From 02ae5136532c97c7e3587a80da10af06608d3355 Mon Sep 17 00:00:00 2001
From: Petr Pavlu <petr.pavlu@dagobah.cz>
Date: Sat, 25 Feb 2023 16:33:00 +0100
Subject: [PATCH] Add RISCV64 support

Code taken from https://github.com/petrpavlu/valgrind-riscv64
Some minor adjustments have been made because that code is based on
upstream libvex.
---
 auxprogs/genoffsets.c        |   36 +
 common.mk                    |    7 +
 priv/guest_riscv64_defs.h    |  136 ++
 priv/guest_riscv64_helpers.c |  475 +++++
 priv/guest_riscv64_toIR.c    | 3511 ++++++++++++++++++++++++++++++++++
 priv/host_riscv64_defs.c     | 2701 ++++++++++++++++++++++++++
 priv/host_riscv64_defs.h     |  644 +++++++
 priv/host_riscv64_isel.c     | 2087 ++++++++++++++++++++
 priv/main_main.c             |  101 +-
 priv/main_util.h             |    9 +
 pub/libvex.h                 |    3 +-
 pub/libvex_guest_riscv64.h   |  148 ++
 pub/libvex_ir.h              |    3 +-
 13 files changed, 9849 insertions(+), 12 deletions(-)
 create mode 100644 priv/guest_riscv64_defs.h
 create mode 100644 priv/guest_riscv64_helpers.c
 create mode 100644 priv/guest_riscv64_toIR.c
 create mode 100644 priv/host_riscv64_defs.c
 create mode 100644 priv/host_riscv64_defs.h
 create mode 100644 priv/host_riscv64_isel.c
 create mode 100644 pub/libvex_guest_riscv64.h

diff --git a/auxprogs/genoffsets.c b/auxprogs/genoffsets.c
index a4f4b9f86..046ab7ce1 100644
--- a/auxprogs/genoffsets.c
+++ b/auxprogs/genoffsets.c
@@ -51,6 +51,7 @@
 #include "../pub/libvex_guest_mips32.h"
 #include "../pub/libvex_guest_mips64.h"
 #include "../pub/libvex_guest_tilegx.h"
+#include "../pub/libvex_guest_riscv64.h"
 
 #undef guest_LR
 
@@ -975,6 +976,41 @@ int main(int argc, char **argv)
    GENOFFSET(TILEGX,tilegx,ex_context_0);
    GENOFFSET(TILEGX,tilegx,ex_context_1);
    GENOFFSET(TILEGX,tilegx,COND);
+
+   // riscv64
+   GENOFFSET(RISCV64,riscv64,x0);
+   GENOFFSET(RISCV64,riscv64,x1);
+   GENOFFSET(RISCV64,riscv64,x2);
+   GENOFFSET(RISCV64,riscv64,x3);
+   GENOFFSET(RISCV64,riscv64,x4);
+   GENOFFSET(RISCV64,riscv64,x5);
+   GENOFFSET(RISCV64,riscv64,x6);
+   GENOFFSET(RISCV64,riscv64,x7);
+   GENOFFSET(RISCV64,riscv64,x9);
+   GENOFFSET(RISCV64,riscv64,x9);
+   GENOFFSET(RISCV64,riscv64,x10);
+   GENOFFSET(RISCV64,riscv64,x11);
+   GENOFFSET(RISCV64,riscv64,x12);
+   GENOFFSET(RISCV64,riscv64,x13);
+   GENOFFSET(RISCV64,riscv64,x14);
+   GENOFFSET(RISCV64,riscv64,x15);
+   GENOFFSET(RISCV64,riscv64,x16);
+   GENOFFSET(RISCV64,riscv64,x17);
+   GENOFFSET(RISCV64,riscv64,x18);
+   GENOFFSET(RISCV64,riscv64,x19);
+   GENOFFSET(RISCV64,riscv64,x20);
+   GENOFFSET(RISCV64,riscv64,x21);
+   GENOFFSET(RISCV64,riscv64,x22);
+   GENOFFSET(RISCV64,riscv64,x23);
+   GENOFFSET(RISCV64,riscv64,x24);
+   GENOFFSET(RISCV64,riscv64,x25);
+   GENOFFSET(RISCV64,riscv64,x26);
+   GENOFFSET(RISCV64,riscv64,x27);
+   GENOFFSET(RISCV64,riscv64,x28);
+   GENOFFSET(RISCV64,riscv64,x29);
+   GENOFFSET(RISCV64,riscv64,x30);
+   GENOFFSET(RISCV64,riscv64,x31);
+   GENOFFSET(RISCV64,riscv64,pc);
 }
 
 /*--------------------------------------------------------------------*/
diff --git a/common.mk b/common.mk
index 5bbeed7d8..9085aeff9 100644
--- a/common.mk
+++ b/common.mk
@@ -18,6 +18,7 @@ PUB_HEADERS = \
 	pub/libvex_guest_arm.h		\
 	pub/libvex_guest_ppc32.h	\
 	pub/libvex_guest_ppc64.h	\
+	pub/libvex_guest_riscv64.h	\
 	pub/libvex_guest_s390x.h	\
 	pub/libvex_s390x_common.h	\
 	pub/libvex_guest_mips32.h
@@ -29,6 +30,7 @@ PRIV_HEADERS = \
 	priv/host_amd64_defs.h		\
 	priv/host_arm_defs.h		\
 	priv/host_ppc_defs.h		\
+	priv/host_riscv64_defs.h	\
 	priv/host_s390_defs.h		\
 	priv/host_mips_defs.h		\
 	priv/host_generic_maddf.h	\
@@ -44,6 +46,7 @@ PRIV_HEADERS = \
 	priv/guest_amd64_defs.h		\
 	priv/guest_arm_defs.h		\
 	priv/guest_ppc_defs.h		\
+	priv/guest_riscv64_defs.h	\
 	priv/guest_mips_defs.h		\
 	priv/s390_disasm.h		\
 	priv/s390_defs.h		\
@@ -63,6 +66,7 @@ NORMAL_OBJS = \
 	priv/host_arm_defs.o		\
 	priv/host_arm64_defs.o		\
 	priv/host_ppc_defs.o		\
+	priv/host_riscv64_defs.o	\
 	priv/host_s390_defs.o		\
 	priv/host_mips_defs.o		\
 	priv/host_x86_isel.o		\
@@ -70,6 +74,7 @@ NORMAL_OBJS = \
 	priv/host_arm_isel.o		\
 	priv/host_arm64_isel.o		\
 	priv/host_ppc_isel.o		\
+	priv/host_riscv64_isel.o	\
 	priv/host_s390_isel.o		\
 	priv/host_mips_isel.o		\
 	priv/host_generic_maddf.o	\
@@ -86,6 +91,7 @@ NORMAL_OBJS = \
 	priv/guest_arm_helpers.o	\
 	priv/guest_arm64_helpers.o	\
 	priv/guest_ppc_helpers.o	\
+	priv/guest_riscv64_helpers.o	\
 	priv/guest_s390_helpers.o	\
 	priv/guest_mips_helpers.o	\
 	priv/guest_x86_toIR.o		\
@@ -93,6 +99,7 @@ NORMAL_OBJS = \
 	priv/guest_arm_toIR.o		\
 	priv/guest_arm64_toIR.o		\
 	priv/guest_ppc_toIR.o		\
+	priv/guest_riscv64_toIR.o	\
 	priv/guest_s390_toIR.o		\
 	priv/guest_mips_toIR.o
 
diff --git a/priv/guest_riscv64_defs.h b/priv/guest_riscv64_defs.h
new file mode 100644
index 000000000..ee5435e14
--- /dev/null
+++ b/priv/guest_riscv64_defs.h
@@ -0,0 +1,136 @@
+
+/*--------------------------------------------------------------------*/
+/*--- begin                                   guest_riscv64_defs.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2020-2023 Petr Pavlu
+      petr.pavlu@dagobah.cz
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+   The GNU General Public License is contained in the file COPYING.
+
+   Neither the names of the U.S. Department of Energy nor the
+   University of California nor the names of its contributors may be
+   used to endorse or promote products derived from this software
+   without prior written permission.
+*/
+
+/* Only to be used within the guest_riscv64_* files. */
+
+#ifndef __VEX_GUEST_RISCV64_DEFS_H
+#define __VEX_GUEST_RISCV64_DEFS_H
+
+#include "libvex_basictypes.h"
+
+#include "guest_generic_bb_to_IR.h"
+
+/*------------------------------------------------------------*/
+/*--- riscv64 to IR conversion                             ---*/
+/*------------------------------------------------------------*/
+
+/* Convert one riscv64 insn to IR. See the type DisOneInstrFn in
+   guest_generic_bb_to_IR.h. */
+DisResult disInstr_RISCV64(IRSB*              irbb,
+                           const UChar*       guest_code,
+                           Long               delta,
+                           Addr               guest_IP,
+                           VexArch            guest_arch,
+                           const VexArchInfo* archinfo,
+                           const VexAbiInfo*  abiinfo,
+                           VexEndness         host_endness,
+                           Bool               sigill_diag);
+
+/* Used by the optimiser to specialise calls to helpers. */
+IRExpr* guest_riscv64_spechelper(const HChar* function_name,
+                                 IRExpr**     args,
+                                 IRStmt**     precedingStmts,
+                                 Int          n_precedingStmts);
+
+/* Describes to the optimiser which part of the guest state require precise
+   memory exceptions. This is logically part of the guest state description. */
+Bool guest_riscv64_state_requires_precise_mem_exns(
+   Int minoff, Int maxoff, VexRegisterUpdates pxControl);
+
+extern VexGuestLayout riscv64guest_layout;
+
+/*------------------------------------------------------------*/
+/*--- riscv64 guest helpers                                ---*/
+/*------------------------------------------------------------*/
+
+/* --- CLEAN HELPERS --- */
+
+/* Calculate resulting flags of a specified floating-point operation. Returns
+   a 32-bit value where bits 4:0 contain the fflags in the RISC-V native
+   format (NV DZ OF UF NX) and remaining upper bits are zero. */
+UInt riscv64g_calculate_fflags_fsqrt_s(Float a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_w_s(Float a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_wu_s(Float a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_s_w(UInt a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_s_wu(UInt a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_l_s(Float a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_lu_s(Float a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_s_l(ULong a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_s_lu(ULong a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fsqrt_d(Double a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_s_d(Double a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_w_d(Double a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_wu_d(Double a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_l_d(Double a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_lu_d(Double a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_d_l(ULong a1, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fcvt_d_lu(ULong a1, UInt rm_RISCV);
+
+UInt riscv64g_calculate_fflags_fadd_s(Float a1, Float a2, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fmul_s(Float a1, Float a2, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fdiv_s(Float a1, Float a2, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fadd_d(Double a1, Double a2, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fmul_d(Double a1, Double a2, UInt rm_RISCV);
+UInt riscv64g_calculate_fflags_fdiv_d(Double a1, Double a2, UInt rm_RISCV);
+
+UInt riscv64g_calculate_fflags_fmin_s(Float a1, Float a2);
+UInt riscv64g_calculate_fflags_fmax_s(Float a1, Float a2);
+UInt riscv64g_calculate_fflags_feq_s(Float a1, Float a2);
+UInt riscv64g_calculate_fflags_flt_s(Float a1, Float a2);
+UInt riscv64g_calculate_fflags_fle_s(Float a1, Float a2);
+UInt riscv64g_calculate_fflags_fmin_d(Double a1, Double a2);
+UInt riscv64g_calculate_fflags_fmax_d(Double a1, Double a2);
+UInt riscv64g_calculate_fflags_feq_d(Double a1, Double a2);
+UInt riscv64g_calculate_fflags_flt_d(Double a1, Double a2);
+UInt riscv64g_calculate_fflags_fle_d(Double a1, Double a2);
+
+UInt riscv64g_calculate_fflags_fmadd_s(Float a1,
+                                       Float a2,
+                                       Float a3,
+                                       UInt  rm_RISCV);
+UInt riscv64g_calculate_fflags_fmadd_d(Double a1,
+                                       Double a2,
+                                       Double a3,
+                                       UInt   rm_RISCV);
+
+/* Calculate floating-point class. Returns a 64-bit value where bits 9:0
+   contains the properties in the RISC-V FCLASS-instruction format and remaining
+   upper bits are zero. */
+ULong riscv64g_calculate_fclass_s(Float a1);
+ULong riscv64g_calculate_fclass_d(Double a1);
+
+#endif /* ndef __VEX_GUEST_RISCV64_DEFS_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                     guest_riscv64_defs.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/priv/guest_riscv64_helpers.c b/priv/guest_riscv64_helpers.c
new file mode 100644
index 000000000..e65b14676
--- /dev/null
+++ b/priv/guest_riscv64_helpers.c
@@ -0,0 +1,475 @@
+
+/*--------------------------------------------------------------------*/
+/*--- begin                                guest_riscv64_helpers.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2020-2023 Petr Pavlu
+      petr.pavlu@dagobah.cz
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "libvex_guest_riscv64.h"
+
+#include "guest_riscv64_defs.h"
+#include "main_util.h"
+
+/* This file contains helper functions for riscv64 guest code. Calls to these
+   functions are generated by the back end. These calls are of course in the
+   host machine code and this file will be compiled to host machine code, so
+   that all makes sense.
+
+   Only change the signatures of these helper functions very carefully. If you
+   change the signature here, you'll have to change the parameters passed to it
+   in the IR calls constructed by guest_riscv64_toIR.c.
+
+   The convention used is that all functions called from generated code are
+   named riscv64g_<something>, and any function whose name lacks that prefix is
+   not called from generated code. Note that some LibVEX_* functions can however
+   be called by VEX's client, but that is not the same as calling them from
+   VEX-generated code.
+*/
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+/* clang-format off */
+#define CALCULATE_FFLAGS_UNARY64_F(inst)                                       \
+   do {                                                                        \
+      UInt res;                                                                \
+      __asm__ __volatile__(                                                    \
+         "csrr t0, fcsr\n\t"                                                   \
+         "csrw frm, %[rm]\n\t"                                                 \
+         "csrw fflags, zero\n\t"                                               \
+         inst " ft0, %[a1]\n\t"                                                \
+         "csrr %[res], fflags\n\t"                                             \
+         "csrw fcsr, t0\n\t"                                                   \
+         : [res] "=r"(res)                                                     \
+         : [a1] "f"(a1), [rm] "r"(rm_RISCV)                                    \
+         : "t0", "ft0");                                                       \
+      return res;                                                              \
+   } while (0)
+#define CALCULATE_FFLAGS_UNARY64_IF(inst)                                      \
+   do {                                                                        \
+      UInt res;                                                                \
+      __asm__ __volatile__(                                                    \
+         "csrr t0, fcsr\n\t"                                                   \
+         "csrw frm, %[rm]\n\t"                                                 \
+         "csrw fflags, zero\n\t"                                               \
+         inst " t1, %[a1]\n\t"                                                 \
+         "csrr %[res], fflags\n\t"                                             \
+         "csrw fcsr, t0\n\t"                                                   \
+         : [res] "=r"(res)                                                     \
+         : [a1] "f"(a1), [rm] "r"(rm_RISCV)                                    \
+         : "t0", "t1");                                                        \
+      return res;                                                              \
+   } while (0)
+#define CALCULATE_FFLAGS_UNARY64_FI(inst)                                      \
+   do {                                                                        \
+      UInt res;                                                                \
+      __asm__ __volatile__(                                                    \
+         "csrr t0, fcsr\n\t"                                                   \
+         "csrw frm, %[rm]\n\t"                                                 \
+         "csrw fflags, zero\n\t"                                               \
+         inst " ft0, %[a1]\n\t"                                                \
+         "csrr %[res], fflags\n\t"                                             \
+         "csrw fcsr, t0\n\t"                                                   \
+         : [res] "=r"(res)                                                     \
+         : [a1] "r"(a1), [rm] "r"(rm_RISCV)                                    \
+         : "t0", "ft0");                                                       \
+      return res;                                                              \
+   } while (0)
+/* clang-format on */
+#else
+/* No simulated version is currently implemented. */
+#define CALCULATE_FFLAGS_UNARY64_F(inst)                                       \
+   do {                                                                        \
+      return 0;                                                                \
+   } while (0)
+#define CALCULATE_FFLAGS_UNARY64_IF(inst)                                      \
+   do {                                                                        \
+      return 0;                                                                \
+   } while (0)
+#define CALCULATE_FFLAGS_UNARY64_FI(inst)                                      \
+   do {                                                                        \
+      return 0;                                                                \
+   } while (0)
+#endif
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPERS */
+UInt riscv64g_calculate_fflags_fsqrt_s(Float a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_F("fsqrt.s");
+}
+UInt riscv64g_calculate_fflags_fcvt_w_s(Float a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_IF("fcvt.w.s");
+}
+UInt riscv64g_calculate_fflags_fcvt_wu_s(Float a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_IF("fcvt.wu.s");
+}
+UInt riscv64g_calculate_fflags_fcvt_s_w(UInt a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_FI("fcvt.s.w");
+}
+UInt riscv64g_calculate_fflags_fcvt_s_wu(UInt a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_FI("fcvt.s.wu");
+}
+UInt riscv64g_calculate_fflags_fcvt_l_s(Float a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_IF("fcvt.l.s");
+}
+UInt riscv64g_calculate_fflags_fcvt_lu_s(Float a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_IF("fcvt.lu.s");
+}
+UInt riscv64g_calculate_fflags_fcvt_s_l(ULong a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_FI("fcvt.s.l");
+}
+UInt riscv64g_calculate_fflags_fcvt_s_lu(ULong a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_FI("fcvt.s.lu");
+}
+UInt riscv64g_calculate_fflags_fsqrt_d(Double a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_F("fsqrt.d");
+}
+UInt riscv64g_calculate_fflags_fcvt_s_d(Double a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_F("fcvt.s.d");
+}
+UInt riscv64g_calculate_fflags_fcvt_w_d(Double a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_IF("fcvt.w.d");
+}
+UInt riscv64g_calculate_fflags_fcvt_wu_d(Double a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_IF("fcvt.wu.d");
+}
+UInt riscv64g_calculate_fflags_fcvt_l_d(Double a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_IF("fcvt.l.d");
+}
+UInt riscv64g_calculate_fflags_fcvt_lu_d(Double a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_IF("fcvt.lu.d");
+}
+UInt riscv64g_calculate_fflags_fcvt_d_l(ULong a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_FI("fcvt.d.l");
+}
+UInt riscv64g_calculate_fflags_fcvt_d_lu(ULong a1, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_UNARY64_FI("fcvt.d.lu");
+}
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+/* clang-format off */
+#define CALCULATE_FFLAGS_BINARY64(inst)                                        \
+   do {                                                                        \
+      UInt res;                                                                \
+      __asm__ __volatile__(                                                    \
+         "csrr t0, fcsr\n\t"                                                   \
+         "csrw frm, %[rm]\n\t"                                                 \
+         "csrw fflags, zero\n\t"                                               \
+         inst " %[a1], %[a1], %[a2]\n\t"                                       \
+         "csrr %[res], fflags\n\t"                                             \
+         "csrw fcsr, t0\n\t"                                                   \
+         : [res] "=r"(res)                                                     \
+         : [a1] "f"(a1), [a2] "f"(a2), [rm] "r"(rm_RISCV)                      \
+         : "t0");                                                              \
+      return res;                                                              \
+   } while (0)
+#define CALCULATE_FFLAGS_BINARY64_IFF(inst)                                    \
+   do {                                                                        \
+      UInt res;                                                                \
+      __asm__ __volatile__(                                                    \
+         "csrr t0, fcsr\n\t"                                                   \
+         "csrw frm, %[rm]\n\t"                                                 \
+         "csrw fflags, zero\n\t"                                               \
+         inst " t1, %[a1], %[a2]\n\t"                                          \
+         "csrr %[res], fflags\n\t"                                             \
+         "csrw fcsr, t0\n\t"                                                   \
+         : [res] "=r"(res)                                                     \
+         : [a1] "f"(a1), [a2] "f"(a2), [rm] "r"(rm_RISCV)                      \
+         : "t0", "t1");                                                        \
+      return res;                                                              \
+   } while (0)
+/* clang-format on */
+#else
+/* No simulated version is currently implemented. */
+#define CALCULATE_FFLAGS_BINARY64(inst)                                        \
+   do {                                                                        \
+      return 0;                                                                \
+   } while (0)
+#define CALCULATE_FFLAGS_BINARY64_IFF(inst)                                    \
+   do {                                                                        \
+      return 0;                                                                \
+   } while (0)
+#endif
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPERS */
+UInt riscv64g_calculate_fflags_fadd_s(Float a1, Float a2, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_BINARY64("fadd.s");
+}
+UInt riscv64g_calculate_fflags_fmul_s(Float a1, Float a2, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_BINARY64("fmul.s");
+}
+UInt riscv64g_calculate_fflags_fdiv_s(Float a1, Float a2, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_BINARY64("fdiv.s");
+}
+UInt riscv64g_calculate_fflags_fadd_d(Double a1, Double a2, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_BINARY64("fadd.d");
+}
+UInt riscv64g_calculate_fflags_fmul_d(Double a1, Double a2, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_BINARY64("fmul.d");
+}
+UInt riscv64g_calculate_fflags_fdiv_d(Double a1, Double a2, UInt rm_RISCV)
+{
+   CALCULATE_FFLAGS_BINARY64("fdiv.d");
+}
+UInt riscv64g_calculate_fflags_fmin_s(Float a1, Float a2)
+{
+   UInt rm_RISCV = 0; /* unused */
+   CALCULATE_FFLAGS_BINARY64("fmin.s");
+}
+UInt riscv64g_calculate_fflags_fmax_s(Float a1, Float a2)
+{
+   UInt rm_RISCV = 0; /* unused */
+   CALCULATE_FFLAGS_BINARY64("fmax.s");
+}
+UInt riscv64g_calculate_fflags_feq_s(Float a1, Float a2)
+{
+   UInt rm_RISCV = 0; /* unused */
+   CALCULATE_FFLAGS_BINARY64_IFF("feq.s");
+}
+UInt riscv64g_calculate_fflags_flt_s(Float a1, Float a2)
+{
+   UInt rm_RISCV = 0; /* unused */
+   CALCULATE_FFLAGS_BINARY64_IFF("flt.s");
+}
+UInt riscv64g_calculate_fflags_fle_s(Float a1, Float a2)
+{
+   UInt rm_RISCV = 0; /* unused */
+   CALCULATE_FFLAGS_BINARY64_IFF("fle.s");
+}
+UInt riscv64g_calculate_fflags_fmin_d(Double a1, Double a2)
+{
+   UInt rm_RISCV = 0; /* unused */
+   CALCULATE_FFLAGS_BINARY64("fmin.d");
+}
+UInt riscv64g_calculate_fflags_fmax_d(Double a1, Double a2)
+{
+   UInt rm_RISCV = 0; /* unused */
+   CALCULATE_FFLAGS_BINARY64("fmax.d");
+}
+UInt riscv64g_calculate_fflags_feq_d(Double a1, Double a2)
+{
+   UInt rm_RISCV = 0; /* unused */
+   CALCULATE_FFLAGS_BINARY64_IFF("feq.d");
+}
+UInt riscv64g_calculate_fflags_flt_d(Double a1, Double a2)
+{
+   UInt rm_RISCV = 0; /* unused */
+   CALCULATE_FFLAGS_BINARY64_IFF("flt.d");
+}
+UInt riscv64g_calculate_fflags_fle_d(Double a1, Double a2)
+{
+   UInt rm_RISCV = 0; /* unused */
+   CALCULATE_FFLAGS_BINARY64_IFF("fle.d");
+}
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+/* clang-format off */
+#define CALCULATE_FFLAGS_TERNARY64(inst)                                       \
+   do {                                                                        \
+      UInt res;                                                                \
+      __asm__ __volatile__(                                                    \
+         "csrr t0, fcsr\n\t"                                                   \
+         "csrw frm, %[rm]\n\t"                                                 \
+         "csrw fflags, zero\n\t"                                               \
+         inst " %[a1], %[a1], %[a2], %[a3]\n\t"                                \
+         "csrr %[res], fflags\n\t"                                             \
+         "csrw fcsr, t0\n\t"                                                   \
+         : [res] "=r"(res)                                                     \
+         : [a1] "f"(a1), [a2] "f"(a2), [a3] "f"(a3), [rm] "r"(rm_RISCV)        \
+         : "t0");                                                              \
+      return res;                                                              \
+   } while (0)
+/* clang-format on */
+#else
+/* No simulated version is currently implemented. */
+#define CALCULATE_FFLAGS_TERNARY64(inst)                                       \
+   do {                                                                        \
+      return 0;                                                                \
+   } while (0)
+#endif
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPERS */
+UInt riscv64g_calculate_fflags_fmadd_s(Float a1,
+                                       Float a2,
+                                       Float a3,
+                                       UInt  rm_RISCV)
+{
+   CALCULATE_FFLAGS_TERNARY64("fmadd.s");
+}
+UInt riscv64g_calculate_fflags_fmadd_d(Double a1,
+                                       Double a2,
+                                       Double a3,
+                                       UInt   rm_RISCV)
+{
+   CALCULATE_FFLAGS_TERNARY64("fmadd.d");
+}
+
+#if defined(__riscv) && (__riscv_xlen == 64)
+/* clang-format off */
+#define CALCULATE_FCLASS(inst)                                                 \
+   do {                                                                        \
+      ULong res;                                                               \
+      __asm__ __volatile__(                                                    \
+         inst " %[res], %[a1]\n\t"                                             \
+         : [res] "=r"(res)                                                     \
+         : [a1] "f"(a1));                                                      \
+      return res;                                                              \
+   } while (0)
+/* clang-format on */
+#else
+/* No simulated version is currently implemented. */
+#define CALCULATE_FCLASS(inst)                                                 \
+   do {                                                                        \
+      return 0;                                                                \
+   } while (0)
+#endif
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPERS */
+ULong riscv64g_calculate_fclass_s(Float a1) { CALCULATE_FCLASS("fclass.s"); }
+ULong riscv64g_calculate_fclass_d(Double a1) { CALCULATE_FCLASS("fclass.d"); }
+
+/*------------------------------------------------------------*/
+/*--- Flag-helpers translation-time function specialisers. ---*/
+/*--- These help iropt specialise calls the above run-time ---*/
+/*--- flags functions.                                     ---*/
+/*------------------------------------------------------------*/
+
+IRExpr* guest_riscv64_spechelper(const HChar* function_name,
+                                 IRExpr**     args,
+                                 IRStmt**     precedingStmts,
+                                 Int          n_precedingStmts)
+{
+   return NULL;
+}
+
+/*------------------------------------------------------------*/
+/*--- Helpers for dealing with, and describing, guest      ---*/
+/*--- state as a whole.                                    ---*/
+/*------------------------------------------------------------*/
+
+/* Initialise the entire riscv64 guest state. */
+/* VISIBLE TO LIBVEX CLIENT */
+void LibVEX_GuestRISCV64_initialise(/*OUT*/ VexGuestRISCV64State* vex_state)
+{
+   vex_bzero(vex_state, sizeof(*vex_state));
+}
+
+/* Figure out if any part of the guest state contained in minoff .. maxoff
+   requires precise memory exceptions. If in doubt return True (but this
+   generates significantly slower code).
+
+   By default we enforce precise exns for guest x2 (sp), x8 (fp) and pc only.
+   These are the minimum needed to extract correct stack backtraces from riscv64
+   code.
+
+   Only x2 (sp) is needed in mode VexRegUpdSpAtMemAccess.
+*/
+Bool guest_riscv64_state_requires_precise_mem_exns(Int                minoff,
+                                                   Int                maxoff,
+                                                   VexRegisterUpdates pxControl)
+{
+   Int fp_min = offsetof(VexGuestRISCV64State, guest_x8);
+   Int fp_max = fp_min + 8 - 1;
+   Int sp_min = offsetof(VexGuestRISCV64State, guest_x2);
+   Int sp_max = sp_min + 8 - 1;
+   Int pc_min = offsetof(VexGuestRISCV64State, guest_pc);
+   Int pc_max = pc_min + 8 - 1;
+
+   if (maxoff < sp_min || minoff > sp_max) {
+      /* No overlap with sp. */
+      if (pxControl == VexRegUpdSpAtMemAccess)
+         return False; /* We only need to check stack pointer. */
+   } else
+      return True;
+
+   if (maxoff < fp_min || minoff > fp_max) {
+      /* No overlap with fp. */
+   } else
+      return True;
+
+   if (maxoff < pc_min || minoff > pc_max) {
+      /* No overlap with pc. */
+   } else
+      return True;
+
+   return False;
+}
+
+#define ALWAYSDEFD(field)                                                      \
+   {                                                                           \
+      offsetof(VexGuestRISCV64State, field),                                   \
+         (sizeof((VexGuestRISCV64State*)0)->field)                             \
+   }
+
+VexGuestLayout riscv64guest_layout = {
+   /* Total size of the guest state, in bytes. */
+   .total_sizeB = sizeof(VexGuestRISCV64State),
+
+   /* Describe the stack pointer. */
+   .offset_SP = offsetof(VexGuestRISCV64State, guest_x2),
+   .sizeof_SP = 8,
+
+   /* Describe the frame pointer. */
+   .offset_FP = offsetof(VexGuestRISCV64State, guest_x8),
+   .sizeof_FP = 8,
+
+   /* Describe the instruction pointer. */
+   .offset_IP = offsetof(VexGuestRISCV64State, guest_pc),
+   .sizeof_IP = 8,
+
+   /* Describe any sections to be regarded by Memcheck as 'always-defined'. */
+   .n_alwaysDefd = 6,
+
+   .alwaysDefd = {
+      /* 0 */ ALWAYSDEFD(guest_x0),
+      /* 1 */ ALWAYSDEFD(guest_pc),
+      /* 2 */ ALWAYSDEFD(guest_EMNOTE),
+      /* 3 */ ALWAYSDEFD(guest_CMSTART),
+      /* 4 */ ALWAYSDEFD(guest_CMLEN),
+      /* 5 */ ALWAYSDEFD(guest_NRADDR),
+   },
+};
+
+/*--------------------------------------------------------------------*/
+/*--- end                                  guest_riscv64_helpers.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/priv/guest_riscv64_toIR.c b/priv/guest_riscv64_toIR.c
new file mode 100644
index 000000000..93ea5a173
--- /dev/null
+++ b/priv/guest_riscv64_toIR.c
@@ -0,0 +1,3511 @@
+
+/*--------------------------------------------------------------------*/
+/*--- begin                                   guest_riscv64_toIR.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2020-2023 Petr Pavlu
+      petr.pavlu@dagobah.cz
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Translates riscv64 code to IR. */
+
+/* "Special" instructions.
+
+   This instruction decoder can decode four special instructions which mean
+   nothing natively (are no-ops as far as regs/mem are concerned) but have
+   meaning for supporting Valgrind. A special instruction is flagged by
+   a 16-byte preamble:
+
+      00305013 00d05013 03305013 03d05013
+      (srli zero, zero, 3;   srli zero, zero, 13
+       srli zero, zero, 51;  srli zero, zero, 61)
+
+   Following that, one of the following 4 are allowed (standard interpretation
+   in parentheses):
+
+      00a56533 (or a0, a0, a0)   a3 = client_request ( a4 )
+      00b5e5b3 (or a1, a1, a1)   a3 = guest_NRADDR
+      00c66633 (or a2, a2, a2)   branch-and-link-to-noredir t0
+      00d6e6b3 (or a3, a3, a3)   IR injection
+
+   Any other bytes following the 16-byte preamble are illegal and constitute
+   a failure in instruction decoding. This all assumes that the preamble will
+   never occur except in specific code fragments designed for Valgrind to catch.
+*/
+
+#include "libvex_guest_riscv64.h"
+
+#include "guest_riscv64_defs.h"
+#include "main_globals.h"
+#include "main_util.h"
+
+/*------------------------------------------------------------*/
+/*--- Debugging output                                     ---*/
+/*------------------------------------------------------------*/
+
+#define DIP(format, args...)                                                   \
+   do {                                                                        \
+      if (vex_traceflags & VEX_TRACE_FE)                                       \
+         vex_printf(format, ##args);                                           \
+   } while (0)
+
+#define DIS(buf, format, args...)                                              \
+   do {                                                                        \
+      if (vex_traceflags & VEX_TRACE_FE)                                       \
+         vex_sprintf(buf, format, ##args);                                     \
+   } while (0)
+
+/*------------------------------------------------------------*/
+/*--- Helper bits and pieces for deconstructing the        ---*/
+/*--- riscv64 insn stream.                                 ---*/
+/*------------------------------------------------------------*/
+
+/* Do a little-endian load of a 32-bit word, regardless of the endianness of the
+   underlying host. */
+static inline UInt getUIntLittleEndianly(const UChar* p)
+{
+   UInt w = 0;
+   w      = (w << 8) | p[3];
+   w      = (w << 8) | p[2];
+   w      = (w << 8) | p[1];
+   w      = (w << 8) | p[0];
+   return w;
+}
+
+/* Do read of an instruction, which can be 16-bit (compressed) or 32-bit in
+   size. */
+static inline UInt getInsn(const UChar* p)
+{
+   Bool is_compressed = (p[0] & 0x3) != 0x3;
+   UInt w             = 0;
+   if (!is_compressed) {
+      w = (w << 8) | p[3];
+      w = (w << 8) | p[2];
+   }
+   w = (w << 8) | p[1];
+   w = (w << 8) | p[0];
+   return w;
+}
+
+/* Produce _uint[_bMax:_bMin]. */
+#define SLICE_UInt(_uint, _bMax, _bMin)                                        \
+   ((((UInt)(_uint)) >> (_bMin)) &                                             \
+    (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
+
+/*------------------------------------------------------------*/
+/*--- Helpers for constructing IR.                         ---*/
+/*------------------------------------------------------------*/
+
+/* Create an expression to produce a 64-bit constant. */
+static IRExpr* mkU64(ULong i) { return IRExpr_Const(IRConst_U64(i)); }
+
+/* Create an expression to produce a 32-bit constant. */
+static IRExpr* mkU32(UInt i) { return IRExpr_Const(IRConst_U32(i)); }
+
+/* Create an expression to produce an 8-bit constant. */
+static IRExpr* mkU8(UInt i)
+{
+   vassert(i < 256);
+   return IRExpr_Const(IRConst_U8((UChar)i));
+}
+
+/* Create an expression to read a temporary. */
+static IRExpr* mkexpr(IRTemp tmp) { return IRExpr_RdTmp(tmp); }
+
+/* Create an unary-operation expression. */
+static IRExpr* unop(IROp op, IRExpr* a) { return IRExpr_Unop(op, a); }
+
+/* Create a binary-operation expression. */
+static IRExpr* binop(IROp op, IRExpr* a1, IRExpr* a2)
+{
+   return IRExpr_Binop(op, a1, a2);
+}
+
+/* Create a ternary-operation expression. */
+static IRExpr* triop(IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3)
+{
+   return IRExpr_Triop(op, a1, a2, a3);
+}
+
+/* Create a quaternary-operation expression. */
+static IRExpr* qop(IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3, IRExpr* a4)
+{
+   return IRExpr_Qop(op, a1, a2, a3, a4);
+}
+
+/* Create an expression to load a value from memory (in the little-endian
+   order). */
+static IRExpr* loadLE(IRType ty, IRExpr* addr)
+{
+   return IRExpr_Load(Iend_LE, ty, addr);
+}
+
+/* Add a statement to the list held by irsb. */
+static void stmt(/*MOD*/ IRSB* irsb, IRStmt* st) { addStmtToIRSB(irsb, st); }
+
+/* Add a statement to assign a value to a temporary. */
+static void assign(/*MOD*/ IRSB* irsb, IRTemp dst, IRExpr* e)
+{
+   stmt(irsb, IRStmt_WrTmp(dst, e));
+}
+
+/* Generate a statement to store a value in memory (in the little-endian
+   order). */
+static void storeLE(/*MOD*/ IRSB* irsb, IRExpr* addr, IRExpr* data)
+{
+   stmt(irsb, IRStmt_Store(Iend_LE, addr, data));
+}
+
+/* Generate a new temporary of the given type. */
+static IRTemp newTemp(/*MOD*/ IRSB* irsb, IRType ty)
+{
+   vassert(isPlausibleIRType(ty));
+   return newIRTemp(irsb->tyenv, ty);
+}
+
+/* Sign-extend a 32/64-bit integer expression to 64 bits. */
+static IRExpr* widenSto64(IRType srcTy, IRExpr* e)
+{
+   switch (srcTy) {
+   case Ity_I64:
+      return e;
+   case Ity_I32:
+      return unop(Iop_32Sto64, e);
+   default:
+      vpanic("widenSto64(riscv64)");
+   }
+}
+
+/* Narrow a 64-bit integer expression to 32/64 bits. */
+static IRExpr* narrowFrom64(IRType dstTy, IRExpr* e)
+{
+   switch (dstTy) {
+   case Ity_I64:
+      return e;
+   case Ity_I32:
+      return unop(Iop_64to32, e);
+   default:
+      vpanic("narrowFrom64(riscv64)");
+   }
+}
+
+/*------------------------------------------------------------*/
+/*--- Offsets of various parts of the riscv64 guest state  ---*/
+/*------------------------------------------------------------*/
+
+#define OFFB_X0  offsetof(VexGuestRISCV64State, guest_x0)
+#define OFFB_X1  offsetof(VexGuestRISCV64State, guest_x1)
+#define OFFB_X2  offsetof(VexGuestRISCV64State, guest_x2)
+#define OFFB_X3  offsetof(VexGuestRISCV64State, guest_x3)
+#define OFFB_X4  offsetof(VexGuestRISCV64State, guest_x4)
+#define OFFB_X5  offsetof(VexGuestRISCV64State, guest_x5)
+#define OFFB_X6  offsetof(VexGuestRISCV64State, guest_x6)
+#define OFFB_X7  offsetof(VexGuestRISCV64State, guest_x7)
+#define OFFB_X8  offsetof(VexGuestRISCV64State, guest_x8)
+#define OFFB_X9  offsetof(VexGuestRISCV64State, guest_x9)
+#define OFFB_X10 offsetof(VexGuestRISCV64State, guest_x10)
+#define OFFB_X11 offsetof(VexGuestRISCV64State, guest_x11)
+#define OFFB_X12 offsetof(VexGuestRISCV64State, guest_x12)
+#define OFFB_X13 offsetof(VexGuestRISCV64State, guest_x13)
+#define OFFB_X14 offsetof(VexGuestRISCV64State, guest_x14)
+#define OFFB_X15 offsetof(VexGuestRISCV64State, guest_x15)
+#define OFFB_X16 offsetof(VexGuestRISCV64State, guest_x16)
+#define OFFB_X17 offsetof(VexGuestRISCV64State, guest_x17)
+#define OFFB_X18 offsetof(VexGuestRISCV64State, guest_x18)
+#define OFFB_X19 offsetof(VexGuestRISCV64State, guest_x19)
+#define OFFB_X20 offsetof(VexGuestRISCV64State, guest_x20)
+#define OFFB_X21 offsetof(VexGuestRISCV64State, guest_x21)
+#define OFFB_X22 offsetof(VexGuestRISCV64State, guest_x22)
+#define OFFB_X23 offsetof(VexGuestRISCV64State, guest_x23)
+#define OFFB_X24 offsetof(VexGuestRISCV64State, guest_x24)
+#define OFFB_X25 offsetof(VexGuestRISCV64State, guest_x25)
+#define OFFB_X26 offsetof(VexGuestRISCV64State, guest_x26)
+#define OFFB_X27 offsetof(VexGuestRISCV64State, guest_x27)
+#define OFFB_X28 offsetof(VexGuestRISCV64State, guest_x28)
+#define OFFB_X29 offsetof(VexGuestRISCV64State, guest_x29)
+#define OFFB_X30 offsetof(VexGuestRISCV64State, guest_x30)
+#define OFFB_X31 offsetof(VexGuestRISCV64State, guest_x31)
+#define OFFB_PC  offsetof(VexGuestRISCV64State, guest_pc)
+
+#define OFFB_F0   offsetof(VexGuestRISCV64State, guest_f0)
+#define OFFB_F1   offsetof(VexGuestRISCV64State, guest_f1)
+#define OFFB_F2   offsetof(VexGuestRISCV64State, guest_f2)
+#define OFFB_F3   offsetof(VexGuestRISCV64State, guest_f3)
+#define OFFB_F4   offsetof(VexGuestRISCV64State, guest_f4)
+#define OFFB_F5   offsetof(VexGuestRISCV64State, guest_f5)
+#define OFFB_F6   offsetof(VexGuestRISCV64State, guest_f6)
+#define OFFB_F7   offsetof(VexGuestRISCV64State, guest_f7)
+#define OFFB_F8   offsetof(VexGuestRISCV64State, guest_f8)
+#define OFFB_F9   offsetof(VexGuestRISCV64State, guest_f9)
+#define OFFB_F10  offsetof(VexGuestRISCV64State, guest_f10)
+#define OFFB_F11  offsetof(VexGuestRISCV64State, guest_f11)
+#define OFFB_F12  offsetof(VexGuestRISCV64State, guest_f12)
+#define OFFB_F13  offsetof(VexGuestRISCV64State, guest_f13)
+#define OFFB_F14  offsetof(VexGuestRISCV64State, guest_f14)
+#define OFFB_F15  offsetof(VexGuestRISCV64State, guest_f15)
+#define OFFB_F16  offsetof(VexGuestRISCV64State, guest_f16)
+#define OFFB_F17  offsetof(VexGuestRISCV64State, guest_f17)
+#define OFFB_F18  offsetof(VexGuestRISCV64State, guest_f18)
+#define OFFB_F19  offsetof(VexGuestRISCV64State, guest_f19)
+#define OFFB_F20  offsetof(VexGuestRISCV64State, guest_f20)
+#define OFFB_F21  offsetof(VexGuestRISCV64State, guest_f21)
+#define OFFB_F22  offsetof(VexGuestRISCV64State, guest_f22)
+#define OFFB_F23  offsetof(VexGuestRISCV64State, guest_f23)
+#define OFFB_F24  offsetof(VexGuestRISCV64State, guest_f24)
+#define OFFB_F25  offsetof(VexGuestRISCV64State, guest_f25)
+#define OFFB_F26  offsetof(VexGuestRISCV64State, guest_f26)
+#define OFFB_F27  offsetof(VexGuestRISCV64State, guest_f27)
+#define OFFB_F28  offsetof(VexGuestRISCV64State, guest_f28)
+#define OFFB_F29  offsetof(VexGuestRISCV64State, guest_f29)
+#define OFFB_F30  offsetof(VexGuestRISCV64State, guest_f30)
+#define OFFB_F31  offsetof(VexGuestRISCV64State, guest_f31)
+#define OFFB_FCSR offsetof(VexGuestRISCV64State, guest_fcsr)
+
+#define OFFB_EMNOTE  offsetof(VexGuestRISCV64State, guest_EMNOTE)
+#define OFFB_CMSTART offsetof(VexGuestRISCV64State, guest_CMSTART)
+#define OFFB_CMLEN   offsetof(VexGuestRISCV64State, guest_CMLEN)
+#define OFFB_NRADDR  offsetof(VexGuestRISCV64State, guest_NRADDR)
+
+#define OFFB_LLSC_SIZE offsetof(VexGuestRISCV64State, guest_LLSC_SIZE)
+#define OFFB_LLSC_ADDR offsetof(VexGuestRISCV64State, guest_LLSC_ADDR)
+#define OFFB_LLSC_DATA offsetof(VexGuestRISCV64State, guest_LLSC_DATA)
+
+/*------------------------------------------------------------*/
+/*--- Integer registers                                    ---*/
+/*------------------------------------------------------------*/
+
+static Int offsetIReg64(UInt iregNo)
+{
+   switch (iregNo) {
+   case 0:
+      return OFFB_X0;
+   case 1:
+      return OFFB_X1;
+   case 2:
+      return OFFB_X2;
+   case 3:
+      return OFFB_X3;
+   case 4:
+      return OFFB_X4;
+   case 5:
+      return OFFB_X5;
+   case 6:
+      return OFFB_X6;
+   case 7:
+      return OFFB_X7;
+   case 8:
+      return OFFB_X8;
+   case 9:
+      return OFFB_X9;
+   case 10:
+      return OFFB_X10;
+   case 11:
+      return OFFB_X11;
+   case 12:
+      return OFFB_X12;
+   case 13:
+      return OFFB_X13;
+   case 14:
+      return OFFB_X14;
+   case 15:
+      return OFFB_X15;
+   case 16:
+      return OFFB_X16;
+   case 17:
+      return OFFB_X17;
+   case 18:
+      return OFFB_X18;
+   case 19:
+      return OFFB_X19;
+   case 20:
+      return OFFB_X20;
+   case 21:
+      return OFFB_X21;
+   case 22:
+      return OFFB_X22;
+   case 23:
+      return OFFB_X23;
+   case 24:
+      return OFFB_X24;
+   case 25:
+      return OFFB_X25;
+   case 26:
+      return OFFB_X26;
+   case 27:
+      return OFFB_X27;
+   case 28:
+      return OFFB_X28;
+   case 29:
+      return OFFB_X29;
+   case 30:
+      return OFFB_X30;
+   case 31:
+      return OFFB_X31;
+   default:
+      vassert(0);
+   }
+}
+
+/* Obtain ABI name of a register. */
+static const HChar* nameIReg(UInt iregNo)
+{
+   vassert(iregNo < 32);
+   static const HChar* names[32] = {
+      "zero", "ra", "sp", "gp", "tp",  "t0",  "t1", "t2", "s0", "s1", "a0",
+      "a1",   "a2", "a3", "a4", "a5",  "a6",  "a7", "s2", "s3", "s4", "s5",
+      "s6",   "s7", "s8", "s9", "s10", "s11", "t3", "t4", "t5", "t6"};
+   return names[iregNo];
+}
+
+/* Read a 64-bit value from a guest integer register. */
+static IRExpr* getIReg64(UInt iregNo)
+{
+   vassert(iregNo < 32);
+   return IRExpr_Get(offsetIReg64(iregNo), Ity_I64);
+}
+
+/* Write a 64-bit value into a guest integer register. */
+static void putIReg64(/*OUT*/ IRSB* irsb, UInt iregNo, /*IN*/ IRExpr* e)
+{
+   vassert(iregNo > 0 && iregNo < 32);
+   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
+   stmt(irsb, IRStmt_Put(offsetIReg64(iregNo), e));
+}
+
+/* Read a 32-bit value from a guest integer register. */
+static IRExpr* getIReg32(UInt iregNo)
+{
+   vassert(iregNo < 32);
+   return unop(Iop_64to32, IRExpr_Get(offsetIReg64(iregNo), Ity_I64));
+}
+
+/* Write a 32-bit value into a guest integer register. */
+static void putIReg32(/*OUT*/ IRSB* irsb, UInt iregNo, /*IN*/ IRExpr* e)
+{
+   vassert(iregNo > 0 && iregNo < 32);
+   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
+   stmt(irsb, IRStmt_Put(offsetIReg64(iregNo), unop(Iop_32Sto64, e)));
+}
+
+/* Write an address into the guest pc. */
+static void putPC(/*OUT*/ IRSB* irsb, /*IN*/ IRExpr* e)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
+   stmt(irsb, IRStmt_Put(OFFB_PC, e));
+}
+
+/*------------------------------------------------------------*/
+/*--- Floating-point registers                             ---*/
+/*------------------------------------------------------------*/
+
+static Int offsetFReg(UInt fregNo)
+{
+   switch (fregNo) {
+   case 0:
+      return OFFB_F0;
+   case 1:
+      return OFFB_F1;
+   case 2:
+      return OFFB_F2;
+   case 3:
+      return OFFB_F3;
+   case 4:
+      return OFFB_F4;
+   case 5:
+      return OFFB_F5;
+   case 6:
+      return OFFB_F6;
+   case 7:
+      return OFFB_F7;
+   case 8:
+      return OFFB_F8;
+   case 9:
+      return OFFB_F9;
+   case 10:
+      return OFFB_F10;
+   case 11:
+      return OFFB_F11;
+   case 12:
+      return OFFB_F12;
+   case 13:
+      return OFFB_F13;
+   case 14:
+      return OFFB_F14;
+   case 15:
+      return OFFB_F15;
+   case 16:
+      return OFFB_F16;
+   case 17:
+      return OFFB_F17;
+   case 18:
+      return OFFB_F18;
+   case 19:
+      return OFFB_F19;
+   case 20:
+      return OFFB_F20;
+   case 21:
+      return OFFB_F21;
+   case 22:
+      return OFFB_F22;
+   case 23:
+      return OFFB_F23;
+   case 24:
+      return OFFB_F24;
+   case 25:
+      return OFFB_F25;
+   case 26:
+      return OFFB_F26;
+   case 27:
+      return OFFB_F27;
+   case 28:
+      return OFFB_F28;
+   case 29:
+      return OFFB_F29;
+   case 30:
+      return OFFB_F30;
+   case 31:
+      return OFFB_F31;
+   default:
+      vassert(0);
+   }
+}
+
+/* Obtain ABI name of a register. */
+static const HChar* nameFReg(UInt fregNo)
+{
+   vassert(fregNo < 32);
+   static const HChar* names[32] = {
+      "ft0", "ft1", "ft2",  "ft3",  "ft4", "ft5", "ft6",  "ft7",
+      "fs0", "fs1", "fa0",  "fa1",  "fa2", "fa3", "fa4",  "fa5",
+      "fa6", "fa7", "fs2",  "fs3",  "fs4", "fs5", "fs6",  "fs7",
+      "fs8", "fs9", "fs10", "fs11", "ft8", "ft9", "ft10", "ft11"};
+   return names[fregNo];
+}
+
+/* Read a 64-bit value from a guest floating-point register. */
+static IRExpr* getFReg64(UInt fregNo)
+{
+   vassert(fregNo < 32);
+   return IRExpr_Get(offsetFReg(fregNo), Ity_F64);
+}
+
+/* Write a 64-bit value into a guest floating-point register. */
+static void putFReg64(/*OUT*/ IRSB* irsb, UInt fregNo, /*IN*/ IRExpr* e)
+{
+   vassert(fregNo < 32);
+   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
+   stmt(irsb, IRStmt_Put(offsetFReg(fregNo), e));
+}
+
+/* Read a 32-bit value from a guest floating-point register. */
+static IRExpr* getFReg32(UInt fregNo)
+{
+   vassert(fregNo < 32);
+   /* Note that the following access depends on the host being little-endian
+      which is checked in disInstr_RISCV64(). */
+   /* TODO Check that the value is correctly NaN-boxed. If not then return
+      the 32-bit canonical qNaN, as mandated by the RISC-V ISA. */
+   return IRExpr_Get(offsetFReg(fregNo), Ity_F32);
+}
+
+/* Write a 32-bit value into a guest floating-point register. */
+static void putFReg32(/*OUT*/ IRSB* irsb, UInt fregNo, /*IN*/ IRExpr* e)
+{
+   vassert(fregNo < 32);
+   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
+   /* Note that the following access depends on the host being little-endian
+      which is checked in disInstr_RISCV64(). */
+   Int offset = offsetFReg(fregNo);
+   stmt(irsb, IRStmt_Put(offset, e));
+   /* Write 1's in the upper bits of the target 64-bit register to create
+      a NaN-boxed value, as mandated by the RISC-V ISA. */
+   stmt(irsb, IRStmt_Put(offset + 4, mkU32(0xffffffff)));
+   /* TODO Check that this works with Memcheck. */
+}
+
+/* Read a 32-bit value from the fcsr. */
+static IRExpr* getFCSR(void) { return IRExpr_Get(OFFB_FCSR, Ity_I32); }
+
+/* Write a 32-bit value into the fcsr. */
+static void putFCSR(/*OUT*/ IRSB* irsb, /*IN*/ IRExpr* e)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
+   stmt(irsb, IRStmt_Put(OFFB_FCSR, e));
+}
+
+/* Accumulate exception flags in fcsr. */
+static void accumulateFFLAGS(/*OUT*/ IRSB* irsb, /*IN*/ IRExpr* e)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
+   putFCSR(irsb, binop(Iop_Or32, getFCSR(), binop(Iop_And32, e, mkU32(0x1f))));
+}
+
+/* Generate IR to get hold of the rounding mode in both RISC-V and IR
+   formats. A floating-point operation can use either a static rounding mode
+   encoded in the instruction, or a dynamic rounding mode held in fcsr. Bind the
+   final result to the passed temporaries (which are allocated by the function).
+ */
+static void mk_get_rounding_mode(/*MOD*/ IRSB*   irsb,
+                                 /*OUT*/ IRTemp* rm_RISCV,
+                                 /*OUT*/ IRTemp* rm_IR,
+                                 UInt            inst_rm_RISCV)
+{
+   /*
+      rounding mode                | RISC-V |  IR
+      --------------------------------------------
+      to nearest, ties to even     |   000  | 0000
+      to zero                      |   001  | 0011
+      to +infinity                 |   010  | 0010
+      to -infinity                 |   011  | 0001
+      to nearest, ties away from 0 |   100  | 0100
+      invalid                      |   101  | 1000
+      invalid                      |   110  | 1000
+      dynamic                      |   111  | 1000
+
+      The 'dynamic' value selects the mode from fcsr. Its value is valid when
+      encoded in the instruction but naturally invalid when found in fcsr.
+
+      Static mode is known at the decode time and can be directly expressed by
+      a respective rounding mode IR constant.
+
+      Dynamic mode requires a runtime mapping from the RISC-V to the IR mode.
+      It can be implemented using the following transformation:
+         t0 = fcsr_rm_RISCV - 20
+         t1 = t0 >> 2
+         t2 = fcsr_rm_RISCV + 3
+         t3 = t2 ^ 3
+         rm_IR = t1 & t3
+   */
+   *rm_RISCV = newTemp(irsb, Ity_I32);
+   *rm_IR    = newTemp(irsb, Ity_I32);
+   switch (inst_rm_RISCV) {
+   case 0b000:
+      assign(irsb, *rm_RISCV, mkU32(0));
+      assign(irsb, *rm_IR, mkU32(Irrm_NEAREST));
+      break;
+   case 0b001:
+      assign(irsb, *rm_RISCV, mkU32(1));
+      assign(irsb, *rm_IR, mkU32(Irrm_ZERO));
+      break;
+   case 0b010:
+      assign(irsb, *rm_RISCV, mkU32(2));
+      assign(irsb, *rm_IR, mkU32(Irrm_PosINF));
+      break;
+   case 0b011:
+      assign(irsb, *rm_RISCV, mkU32(3));
+      assign(irsb, *rm_IR, mkU32(Irrm_NegINF));
+      break;
+   case 0b100:
+      assign(irsb, *rm_RISCV, mkU32(4));
+      assign(irsb, *rm_IR, mkU32(Irrm_NEAREST_TIE_AWAY_0));
+      break;
+   case 0b101:
+      assign(irsb, *rm_RISCV, mkU32(5));
+      assign(irsb, *rm_IR, mkU32(Irrm_INVALID));
+      break;
+   case 0b110:
+      assign(irsb, *rm_RISCV, mkU32(6));
+      assign(irsb, *rm_IR, mkU32(Irrm_INVALID));
+      break;
+   case 0b111: {
+      assign(irsb, *rm_RISCV,
+             binop(Iop_And32, binop(Iop_Shr32, getFCSR(), mkU8(5)), mkU32(7)));
+      IRTemp t0 = newTemp(irsb, Ity_I32);
+      assign(irsb, t0, binop(Iop_Sub32, mkexpr(*rm_RISCV), mkU32(20)));
+      IRTemp t1 = newTemp(irsb, Ity_I32);
+      assign(irsb, t1, binop(Iop_Shr32, mkexpr(t0), mkU8(2)));
+      IRTemp t2 = newTemp(irsb, Ity_I32);
+      assign(irsb, t2, binop(Iop_Add32, mkexpr(*rm_RISCV), mkU32(3)));
+      IRTemp t3 = newTemp(irsb, Ity_I32);
+      assign(irsb, t3, binop(Iop_Xor32, mkexpr(t2), mkU32(3)));
+      assign(irsb, *rm_IR, binop(Iop_And32, mkexpr(t1), mkexpr(t3)));
+      break;
+   }
+   default:
+      vassert(0);
+   }
+}
+
+/*------------------------------------------------------------*/
+/*--- Name helpers                                         ---*/
+/*------------------------------------------------------------*/
+
+/* Obtain an acquire/release atomic-instruction suffix. */
+static const HChar* nameAqRlSuffix(UInt aqrl)
+{
+   switch (aqrl) {
+   case 0b00:
+      return "";
+   case 0b01:
+      return ".rl";
+   case 0b10:
+      return ".aq";
+   case 0b11:
+      return ".aqrl";
+   default:
+      vpanic("nameAqRlSuffix(riscv64)");
+   }
+}
+
+/* Obtain a control/status register name. */
+static const HChar* nameCSR(UInt csr)
+{
+   switch (csr) {
+   case 0x001:
+      return "fflags";
+   case 0x002:
+      return "frm";
+   case 0x003:
+      return "fcsr";
+   default:
+      vpanic("nameCSR(riscv64)");
+   }
+}
+
+/* Obtain a floating-point rounding-mode operand string. */
+static const HChar* nameRMOperand(UInt rm)
+{
+   switch (rm) {
+   case 0b000:
+      return ", rne";
+   case 0b001:
+      return ", rtz";
+   case 0b010:
+      return ", rdn";
+   case 0b011:
+      return ", rup";
+   case 0b100:
+      return ", rmm";
+   case 0b101:
+      return ", <invalid>";
+   case 0b110:
+      return ", <invalid>";
+   case 0b111:
+      return ""; /* dyn */
+   default:
+      vpanic("nameRMOperand(riscv64)");
+   }
+}
+
+/*------------------------------------------------------------*/
+/*--- Disassemble a single instruction                     ---*/
+/*------------------------------------------------------------*/
+
+/* A macro to fish bits out of 'insn' which is a local variable to all
+   disassembly functions. */
+#define INSN(_bMax, _bMin) SLICE_UInt(insn, (_bMax), (_bMin))
+
+static Bool dis_RV64C(/*MB_OUT*/ DisResult* dres,
+                      /*OUT*/ IRSB*         irsb,
+                      UInt                  insn,
+                      Addr                  guest_pc_curr_instr,
+                      Bool                  sigill_diag)
+{
+   vassert(INSN(1, 0) == 0b00 || INSN(1, 0) == 0b01 || INSN(1, 0) == 0b10);
+
+   /* ---- RV64C compressed instruction set, quadrant 0 ----- */
+
+   /* ------------- c.addi4spn rd, nzuimm[9:2] -------------- */
+   if (INSN(1, 0) == 0b00 && INSN(15, 13) == 0b000) {
+      UInt rd = INSN(4, 2) + 8;
+      UInt nzuimm9_2 =
+         INSN(10, 7) << 4 | INSN(12, 11) << 2 | INSN(5, 5) << 1 | INSN(6, 6);
+      if (nzuimm9_2 == 0) {
+         /* Invalid C.ADDI4SPN, fall through. */
+      } else {
+         ULong uimm = nzuimm9_2 << 2;
+         putIReg64(irsb, rd,
+                   binop(Iop_Add64, getIReg64(2 /*x2/sp*/), mkU64(uimm)));
+         DIP("c.addi4spn %s, %llu\n", nameIReg(rd), uimm);
+         return True;
+      }
+   }
+
+   /* -------------- c.fld rd, uimm[7:3](rs1) --------------- */
+   if (INSN(1, 0) == 0b00 && INSN(15, 13) == 0b001) {
+      UInt  rd      = INSN(4, 2) + 8;
+      UInt  rs1     = INSN(9, 7) + 8;
+      UInt  uimm7_3 = INSN(6, 5) << 3 | INSN(12, 10);
+      ULong uimm    = uimm7_3 << 3;
+      putFReg64(irsb, rd,
+                loadLE(Ity_F64, binop(Iop_Add64, getIReg64(rs1), mkU64(uimm))));
+      DIP("c.fld %s, %llu(%s)\n", nameFReg(rd), uimm, nameIReg(rs1));
+      return True;
+   }
+
+   /* --------------- c.lw rd, uimm[6:2](rs1) --------------- */
+   if (INSN(1, 0) == 0b00 && INSN(15, 13) == 0b010) {
+      UInt  rd      = INSN(4, 2) + 8;
+      UInt  rs1     = INSN(9, 7) + 8;
+      UInt  uimm6_2 = INSN(5, 5) << 4 | INSN(12, 10) << 1 | INSN(6, 6);
+      ULong uimm    = uimm6_2 << 2;
+      putIReg64(
+         irsb, rd,
+         unop(Iop_32Sto64,
+              loadLE(Ity_I32, binop(Iop_Add64, getIReg64(rs1), mkU64(uimm)))));
+      DIP("c.lw %s, %llu(%s)\n", nameIReg(rd), uimm, nameIReg(rs1));
+      return True;
+   }
+
+   /* --------------- c.ld rd, uimm[7:3](rs1) --------------- */
+   if (INSN(1, 0) == 0b00 && INSN(15, 13) == 0b011) {
+      UInt  rd      = INSN(4, 2) + 8;
+      UInt  rs1     = INSN(9, 7) + 8;
+      UInt  uimm7_3 = INSN(6, 5) << 3 | INSN(12, 10);
+      ULong uimm    = uimm7_3 << 3;
+      putIReg64(irsb, rd,
+                loadLE(Ity_I64, binop(Iop_Add64, getIReg64(rs1), mkU64(uimm))));
+      DIP("c.ld %s, %llu(%s)\n", nameIReg(rd), uimm, nameIReg(rs1));
+      return True;
+   }
+
+   /* -------------- c.fsd rs2, uimm[7:3](rs1) -------------- */
+   if (INSN(1, 0) == 0b00 && INSN(15, 13) == 0b101) {
+      UInt  rs1     = INSN(9, 7) + 8;
+      UInt  rs2     = INSN(4, 2) + 8;
+      UInt  uimm7_3 = INSN(6, 5) << 3 | INSN(12, 10);
+      ULong uimm    = uimm7_3 << 3;
+      storeLE(irsb, binop(Iop_Add64, getIReg64(rs1), mkU64(uimm)),
+              getFReg64(rs2));
+      DIP("c.fsd %s, %llu(%s)\n", nameFReg(rs2), uimm, nameIReg(rs1));
+      return True;
+   }
+
+   /* -------------- c.sw rs2, uimm[6:2](rs1) --------------- */
+   if (INSN(1, 0) == 0b00 && INSN(15, 13) == 0b110) {
+      UInt  rs1     = INSN(9, 7) + 8;
+      UInt  rs2     = INSN(4, 2) + 8;
+      UInt  uimm6_2 = INSN(5, 5) << 4 | INSN(12, 10) << 1 | INSN(6, 6);
+      ULong uimm    = uimm6_2 << 2;
+      storeLE(irsb, binop(Iop_Add64, getIReg64(rs1), mkU64(uimm)),
+              unop(Iop_64to32, getIReg64(rs2)));
+      DIP("c.sw %s, %llu(%s)\n", nameIReg(rs2), uimm, nameIReg(rs1));
+      return True;
+   }
+
+   /* -------------- c.sd rs2, uimm[7:3](rs1) --------------- */
+   if (INSN(1, 0) == 0b00 && INSN(15, 13) == 0b111) {
+      UInt  rs1     = INSN(9, 7) + 8;
+      UInt  rs2     = INSN(4, 2) + 8;
+      UInt  uimm7_3 = INSN(6, 5) << 3 | INSN(12, 10);
+      ULong uimm    = uimm7_3 << 3;
+      storeLE(irsb, binop(Iop_Add64, getIReg64(rs1), mkU64(uimm)),
+              getIReg64(rs2));
+      DIP("c.sd %s, %llu(%s)\n", nameIReg(rs2), uimm, nameIReg(rs1));
+      return True;
+   }
+
+   /* ---- RV64C compressed instruction set, quadrant 1 ----- */
+
+   /* ------------------------ c.nop ------------------------ */
+   if (INSN(15, 0) == 0b0000000000000001) {
+      DIP("c.nop\n");
+      return True;
+   }
+
+   /* -------------- c.addi rd_rs1, nzimm[5:0] -------------- */
+   if (INSN(1, 0) == 0b01 && INSN(15, 13) == 0b000) {
+      UInt rd_rs1   = INSN(11, 7);
+      UInt nzimm5_0 = INSN(12, 12) << 5 | INSN(6, 2);
+      if (rd_rs1 == 0 || nzimm5_0 == 0) {
+         /* Invalid C.ADDI, fall through. */
+      } else {
+         ULong simm = vex_sx_to_64(nzimm5_0, 6);
+         putIReg64(irsb, rd_rs1,
+                   binop(Iop_Add64, getIReg64(rd_rs1), mkU64(simm)));
+         DIP("c.addi %s, %lld\n", nameIReg(rd_rs1), (Long)simm);
+         return True;
+      }
+   }
+
+   /* -------------- c.addiw rd_rs1, imm[5:0] --------------- */
+   if (INSN(1, 0) == 0b01 && INSN(15, 13) == 0b001) {
+      UInt rd_rs1 = INSN(11, 7);
+      UInt imm5_0 = INSN(12, 12) << 5 | INSN(6, 2);
+      if (rd_rs1 == 0) {
+         /* Invalid C.ADDIW, fall through. */
+      } else {
+         UInt simm = (UInt)vex_sx_to_64(imm5_0, 6);
+         putIReg32(irsb, rd_rs1,
+                   binop(Iop_Add32, getIReg32(rd_rs1), mkU32(simm)));
+         DIP("c.addiw %s, %d\n", nameIReg(rd_rs1), (Int)simm);
+         return True;
+      }
+   }
+
+   /* ------------------ c.li rd, imm[5:0] ------------------ */
+   if (INSN(1, 0) == 0b01 && INSN(15, 13) == 0b010) {
+      UInt rd     = INSN(11, 7);
+      UInt imm5_0 = INSN(12, 12) << 5 | INSN(6, 2);
+      if (rd == 0) {
+         /* Invalid C.LI, fall through. */
+      } else {
+         ULong simm = vex_sx_to_64(imm5_0, 6);
+         putIReg64(irsb, rd, mkU64(simm));
+         DIP("c.li %s, %lld\n", nameIReg(rd), (Long)simm);
+         return True;
+      }
+   }
+
+   /* ---------------- c.addi16sp nzimm[9:4] ---------------- */
+   if (INSN(1, 0) == 0b01 && INSN(15, 13) == 0b011) {
+      UInt rd_rs1   = INSN(11, 7);
+      UInt nzimm9_4 = INSN(12, 12) << 5 | INSN(4, 3) << 3 | INSN(5, 5) << 2 |
+                      INSN(2, 2) << 1 | INSN(6, 6);
+      if (rd_rs1 != 2 || nzimm9_4 == 0) {
+         /* Invalid C.ADDI16SP, fall through. */
+      } else {
+         ULong simm = vex_sx_to_64(nzimm9_4 << 4, 10);
+         putIReg64(irsb, rd_rs1,
+                   binop(Iop_Add64, getIReg64(rd_rs1), mkU64(simm)));
+         DIP("c.addi16sp %lld\n", (Long)simm);
+         return True;
+      }
+   }
+
+   /* --------------- c.lui rd, nzimm[17:12] ---------------- */
+   if (INSN(1, 0) == 0b01 && INSN(15, 13) == 0b011) {
+      UInt rd         = INSN(11, 7);
+      UInt nzimm17_12 = INSN(12, 12) << 5 | INSN(6, 2);
+      if (rd == 0 || rd == 2 || nzimm17_12 == 0) {
+         /* Invalid C.LUI, fall through. */
+      } else {
+         putIReg64(irsb, rd, mkU64(vex_sx_to_64(nzimm17_12 << 12, 18)));
+         DIP("c.lui %s, 0x%x\n", nameIReg(rd), nzimm17_12);
+         return True;
+      }
+   }
+
+   /* ---------- c.{srli,srai} rd_rs1, nzuimm[5:0] ---------- */
+   if (INSN(1, 0) == 0b01 && INSN(11, 11) == 0b0 && INSN(15, 13) == 0b100) {
+      Bool is_log    = INSN(10, 10) == 0b0;
+      UInt rd_rs1    = INSN(9, 7) + 8;
+      UInt nzuimm5_0 = INSN(12, 12) << 5 | INSN(6, 2);
+      if (nzuimm5_0 == 0) {
+         /* Invalid C.{SRLI,SRAI}, fall through. */
+      } else {
+         putIReg64(irsb, rd_rs1,
+                   binop(is_log ? Iop_Shr64 : Iop_Sar64, getIReg64(rd_rs1),
+                         mkU8(nzuimm5_0)));
+         DIP("c.%s %s, %u\n", is_log ? "srli" : "srai", nameIReg(rd_rs1),
+             nzuimm5_0);
+         return True;
+      }
+   }
+
+   /* --------------- c.andi rd_rs1, imm[5:0] --------------- */
+   if (INSN(1, 0) == 0b01 && INSN(11, 10) == 0b10 && INSN(15, 13) == 0b100) {
+      UInt rd_rs1 = INSN(9, 7) + 8;
+      UInt imm5_0 = INSN(12, 12) << 5 | INSN(6, 2);
+      if (rd_rs1 == 0) {
+         /* Invalid C.ANDI, fall through. */
+      } else {
+         ULong simm = vex_sx_to_64(imm5_0, 6);
+         putIReg64(irsb, rd_rs1,
+                   binop(Iop_And64, getIReg64(rd_rs1), mkU64(simm)));
+         DIP("c.andi %s, 0x%llx\n", nameIReg(rd_rs1), simm);
+         return True;
+      }
+   }
+
+   /* ----------- c.{sub,xor,or,and} rd_rs1, rs2 ----------- */
+   if (INSN(1, 0) == 0b01 && INSN(15, 10) == 0b100011) {
+      UInt         funct2 = INSN(6, 5);
+      UInt         rd_rs1 = INSN(9, 7) + 8;
+      UInt         rs2    = INSN(4, 2) + 8;
+      const HChar* name;
+      IROp         op;
+      switch (funct2) {
+      case 0b00:
+         name = "sub";
+         op   = Iop_Sub64;
+         break;
+      case 0b01:
+         name = "xor";
+         op   = Iop_Xor64;
+         break;
+      case 0b10:
+         name = "or";
+         op   = Iop_Or64;
+         break;
+      case 0b11:
+         name = "and";
+         op   = Iop_And64;
+         break;
+      default:
+         vassert(0);
+      }
+      putIReg64(irsb, rd_rs1, binop(op, getIReg64(rd_rs1), getIReg64(rs2)));
+      DIP("c.%s %s, %s\n", name, nameIReg(rd_rs1), nameIReg(rs2));
+      return True;
+   }
+
+   /* -------------- c.{subw,addw} rd_rs1, rs2 -------------- */
+   if (INSN(1, 0) == 0b01 && INSN(6, 6) == 0b0 && INSN(15, 10) == 0b100111) {
+      Bool is_sub = INSN(5, 5) == 0b0;
+      UInt rd_rs1 = INSN(9, 7) + 8;
+      UInt rs2    = INSN(4, 2) + 8;
+      putIReg32(irsb, rd_rs1,
+                binop(is_sub ? Iop_Sub32 : Iop_Add32, getIReg32(rd_rs1),
+                      getIReg32(rs2)));
+      DIP("c.%s %s, %s\n", is_sub ? "subw" : "addw", nameIReg(rd_rs1),
+          nameIReg(rs2));
+      return True;
+   }
+
+   /* -------------------- c.j imm[11:1] -------------------- */
+   if (INSN(1, 0) == 0b01 && INSN(15, 13) == 0b101) {
+      UInt imm11_1 = INSN(12, 12) << 10 | INSN(8, 8) << 9 | INSN(10, 9) << 7 |
+                     INSN(6, 6) << 6 | INSN(7, 7) << 5 | INSN(2, 2) << 4 |
+                     INSN(11, 11) << 3 | INSN(5, 3);
+      ULong simm   = vex_sx_to_64(imm11_1 << 1, 12);
+      ULong dst_pc = guest_pc_curr_instr + simm;
+      putPC(irsb, mkU64(dst_pc));
+      dres->whatNext    = Dis_StopHere;
+      dres->jk_StopHere = Ijk_Boring;
+      DIP("c.j 0x%llx\n", dst_pc);
+      return True;
+   }
+
+   /* ------------- c.{beqz,bnez} rs1, imm[8:1] ------------- */
+   if (INSN(1, 0) == 0b01 && INSN(15, 14) == 0b11) {
+      Bool is_eq  = INSN(13, 13) == 0b0;
+      UInt rs1    = INSN(9, 7) + 8;
+      UInt imm8_1 = INSN(12, 12) << 7 | INSN(6, 5) << 5 | INSN(2, 2) << 4 |
+                    INSN(11, 10) << 2 | INSN(4, 3);
+      ULong simm   = vex_sx_to_64(imm8_1 << 1, 9);
+      ULong dst_pc = guest_pc_curr_instr + simm;
+      stmt(irsb, IRStmt_Exit(binop(is_eq ? Iop_CmpEQ64 : Iop_CmpNE64,
+                                   getIReg64(rs1), mkU64(0)),
+                             Ijk_Boring, IRConst_U64(dst_pc), OFFB_PC));
+      putPC(irsb, mkU64(guest_pc_curr_instr + 2));
+      dres->whatNext    = Dis_StopHere;
+      dres->jk_StopHere = Ijk_Boring;
+      DIP("c.%s %s, 0x%llx\n", is_eq ? "beqz" : "bnez", nameIReg(rs1), dst_pc);
+      return True;
+   }
+
+   /* ---- RV64C compressed instruction set, quadrant 2 ----- */
+
+   /* ------------- c.slli rd_rs1, nzuimm[5:0] -------------- */
+   if (INSN(1, 0) == 0b10 && INSN(15, 13) == 0b000) {
+      UInt rd_rs1    = INSN(11, 7);
+      UInt nzuimm5_0 = INSN(12, 12) << 5 | INSN(6, 2);
+      if (rd_rs1 == 0 || nzuimm5_0 == 0) {
+         /* Invalid C.SLLI, fall through. */
+      } else {
+         putIReg64(irsb, rd_rs1,
+                   binop(Iop_Shl64, getIReg64(rd_rs1), mkU8(nzuimm5_0)));
+         DIP("c.slli %s, %u\n", nameIReg(rd_rs1), nzuimm5_0);
+         return True;
+      }
+   }
+
+   /* -------------- c.fldsp rd, uimm[8:3](x2) -------------- */
+   if (INSN(1, 0) == 0b10 && INSN(15, 13) == 0b001) {
+      UInt  rd      = INSN(11, 7);
+      UInt  rs1     = 2; /* base=x2/sp */
+      UInt  uimm8_3 = INSN(4, 2) << 3 | INSN(12, 12) << 2 | INSN(6, 5);
+      ULong uimm    = uimm8_3 << 3;
+      putFReg64(irsb, rd,
+                loadLE(Ity_F64, binop(Iop_Add64, getIReg64(rs1), mkU64(uimm))));
+      DIP("c.fldsp %s, %llu(%s)\n", nameFReg(rd), uimm, nameIReg(rs1));
+      return True;
+   }
+
+   /* -------------- c.lwsp rd, uimm[7:2](x2) --------------- */
+   if (INSN(1, 0) == 0b10 && INSN(15, 13) == 0b010) {
+      UInt rd      = INSN(11, 7);
+      UInt rs1     = 2; /* base=x2/sp */
+      UInt uimm7_2 = INSN(3, 2) << 4 | INSN(12, 12) << 3 | INSN(6, 4);
+      if (rd == 0) {
+         /* Invalid C.LWSP, fall through. */
+      } else {
+         ULong uimm = uimm7_2 << 2;
+         putIReg64(irsb, rd,
+                   unop(Iop_32Sto64,
+                        loadLE(Ity_I32,
+                               binop(Iop_Add64, getIReg64(rs1), mkU64(uimm)))));
+         DIP("c.lwsp %s, %llu(%s)\n", nameIReg(rd), uimm, nameIReg(rs1));
+         return True;
+      }
+   }
+
+   /* -------------- c.ldsp rd, uimm[8:3](x2) --------------- */
+   if (INSN(1, 0) == 0b10 && INSN(15, 13) == 0b011) {
+      UInt rd      = INSN(11, 7);
+      UInt rs1     = 2; /* base=x2/sp */
+      UInt uimm8_3 = INSN(4, 2) << 3 | INSN(12, 12) << 2 | INSN(6, 5);
+      if (rd == 0) {
+         /* Invalid C.LDSP, fall through. */
+      } else {
+         ULong uimm = uimm8_3 << 3;
+         putIReg64(
+            irsb, rd,
+            loadLE(Ity_I64, binop(Iop_Add64, getIReg64(rs1), mkU64(uimm))));
+         DIP("c.ldsp %s, %llu(%s)\n", nameIReg(rd), uimm, nameIReg(rs1));
+         return True;
+      }
+   }
+
+   /* ---------------------- c.jr rs1 ----------------------- */
+   if (INSN(1, 0) == 0b10 && INSN(15, 12) == 0b1000) {
+      UInt rs1 = INSN(11, 7);
+      UInt rs2 = INSN(6, 2);
+      if (rs1 == 0 || rs2 != 0) {
+         /* Invalid C.JR, fall through. */
+      } else {
+         putPC(irsb, getIReg64(rs1));
+         dres->whatNext = Dis_StopHere;
+         if (rs1 == 1 /*x1/ra*/) {
+            dres->jk_StopHere = Ijk_Ret;
+            DIP("c.ret\n");
+         } else {
+            dres->jk_StopHere = Ijk_Boring;
+            DIP("c.jr %s\n", nameIReg(rs1));
+         }
+         return True;
+      }
+   }
+
+   /* -------------------- c.mv rd, rs2 --------------------- */
+   if (INSN(1, 0) == 0b10 && INSN(15, 12) == 0b1000) {
+      UInt rd  = INSN(11, 7);
+      UInt rs2 = INSN(6, 2);
+      if (rd == 0 || rs2 == 0) {
+         /* Invalid C.MV, fall through. */
+      } else {
+         putIReg64(irsb, rd, getIReg64(rs2));
+         DIP("c.mv %s, %s\n", nameIReg(rd), nameIReg(rs2));
+         return True;
+      }
+   }
+
+   /* --------------------- c.ebreak ------------------------ */
+   if (INSN(15, 0) == 0b1001000000000010) {
+      putPC(irsb, mkU64(guest_pc_curr_instr + 2));
+      dres->whatNext    = Dis_StopHere;
+      dres->jk_StopHere = Ijk_SigTRAP;
+      DIP("c.ebreak\n");
+      return True;
+   }
+
+   /* --------------------- c.jalr rs1 ---------------------- */
+   if (INSN(1, 0) == 0b10 && INSN(15, 12) == 0b1001) {
+      UInt rs1 = INSN(11, 7);
+      UInt rs2 = INSN(6, 2);
+      if (rs1 == 0 || rs2 != 0) {
+         /* Invalid C.JALR, fall through. */
+      } else {
+         putIReg64(irsb, 1 /*x1/ra*/, mkU64(guest_pc_curr_instr + 2));
+         putPC(irsb, getIReg64(rs1));
+         dres->whatNext    = Dis_StopHere;
+         dres->jk_StopHere = Ijk_Call;
+         DIP("c.jalr %s\n", nameIReg(rs1));
+         return True;
+      }
+   }
+
+   /* ------------------ c.add rd_rs1, rs2 ------------------ */
+   if (INSN(1, 0) == 0b10 && INSN(15, 12) == 0b1001) {
+      UInt rd_rs1 = INSN(11, 7);
+      UInt rs2    = INSN(6, 2);
+      if (rd_rs1 == 0 || rs2 == 0) {
+         /* Invalid C.ADD, fall through. */
+      } else {
+         putIReg64(irsb, rd_rs1,
+                   binop(Iop_Add64, getIReg64(rd_rs1), getIReg64(rs2)));
+         DIP("c.add %s, %s\n", nameIReg(rd_rs1), nameIReg(rs2));
+         return True;
+      }
+   }
+
+   /* ------------- c.fsdsp rs2, uimm[8:3](x2) -------------- */
+   if (INSN(1, 0) == 0b10 && INSN(15, 13) == 0b101) {
+      UInt  rs1     = 2; /* base=x2/sp */
+      UInt  rs2     = INSN(6, 2);
+      UInt  uimm8_3 = INSN(9, 7) << 3 | INSN(12, 10);
+      ULong uimm    = uimm8_3 << 3;
+      storeLE(irsb, binop(Iop_Add64, getIReg64(rs1), mkU64(uimm)),
+              getFReg64(rs2));
+      DIP("c.fsdsp %s, %llu(%s)\n", nameFReg(rs2), uimm, nameIReg(rs1));
+      return True;
+   }
+
+   /* -------------- c.swsp rs2, uimm[7:2](x2) -------------- */
+   if (INSN(1, 0) == 0b10 && INSN(15, 13) == 0b110) {
+      UInt  rs1     = 2; /* base=x2/sp */
+      UInt  rs2     = INSN(6, 2);
+      UInt  uimm7_2 = INSN(8, 7) << 4 | INSN(12, 9);
+      ULong uimm    = uimm7_2 << 2;
+      storeLE(irsb, binop(Iop_Add64, getIReg64(rs1), mkU64(uimm)),
+              unop(Iop_64to32, getIReg64(rs2)));
+      DIP("c.swsp %s, %llu(%s)\n", nameIReg(rs2), uimm, nameIReg(rs1));
+      return True;
+   }
+
+   /* -------------- c.sdsp rs2, uimm[8:3](x2) -------------- */
+   if (INSN(1, 0) == 0b10 && INSN(15, 13) == 0b111) {
+      UInt  rs1     = 2; /* base=x2/sp */
+      UInt  rs2     = INSN(6, 2);
+      UInt  uimm8_3 = INSN(9, 7) << 3 | INSN(12, 10);
+      ULong uimm    = uimm8_3 << 3;
+      storeLE(irsb, binop(Iop_Add64, getIReg64(rs1), mkU64(uimm)),
+              getIReg64(rs2));
+      DIP("c.sdsp %s, %llu(%s)\n", nameIReg(rs2), uimm, nameIReg(rs1));
+      return True;
+   }
+
+   if (sigill_diag)
+      vex_printf("RISCV64 front end: compressed\n");
+   return False;
+}
+
+static Bool dis_RV64I(/*MB_OUT*/ DisResult* dres,
+                      /*OUT*/ IRSB*         irsb,
+                      UInt                  insn,
+                      Addr                  guest_pc_curr_instr)
+{
+   /* ------------- RV64I base instruction set -------------- */
+
+   /* ----------------- lui rd, imm[31:12] ------------------ */
+   if (INSN(6, 0) == 0b0110111) {
+      UInt rd       = INSN(11, 7);
+      UInt imm31_12 = INSN(31, 12);
+      if (rd != 0)
+         putIReg64(irsb, rd, mkU64(vex_sx_to_64(imm31_12 << 12, 32)));
+      DIP("lui %s, 0x%x\n", nameIReg(rd), imm31_12);
+      return True;
+   }
+
+   /* ---------------- auipc rd, imm[31:12] ----------------- */
+   if (INSN(6, 0) == 0b0010111) {
+      UInt rd       = INSN(11, 7);
+      UInt imm31_12 = INSN(31, 12);
+      if (rd != 0)
+         putIReg64(
+            irsb, rd,
+            mkU64(guest_pc_curr_instr + vex_sx_to_64(imm31_12 << 12, 32)));
+      DIP("auipc %s, 0x%x\n", nameIReg(rd), imm31_12);
+      return True;
+   }
+
+   /* ------------------ jal rd, imm[20:1] ------------------ */
+   if (INSN(6, 0) == 0b1101111) {
+      UInt rd      = INSN(11, 7);
+      UInt imm20_1 = INSN(31, 31) << 19 | INSN(19, 12) << 11 |
+                     INSN(20, 20) << 10 | INSN(30, 21);
+      ULong simm   = vex_sx_to_64(imm20_1 << 1, 21);
+      ULong dst_pc = guest_pc_curr_instr + simm;
+      if (rd != 0)
+         putIReg64(irsb, rd, mkU64(guest_pc_curr_instr + 4));
+      putPC(irsb, mkU64(dst_pc));
+      dres->whatNext = Dis_StopHere;
+      if (rd != 0) {
+         dres->jk_StopHere = Ijk_Call;
+         DIP("jal %s, 0x%llx\n", nameIReg(rd), dst_pc);
+      } else {
+         dres->jk_StopHere = Ijk_Boring;
+         DIP("j 0x%llx\n", dst_pc);
+      }
+      return True;
+   }
+
+   /* --------------- jalr rd, imm[11:0](rs1) --------------- */
+   if (INSN(6, 0) == 0b1100111 && INSN(14, 12) == 0b000) {
+      UInt   rd      = INSN(11, 7);
+      UInt   rs1     = INSN(19, 15);
+      UInt   imm11_0 = INSN(31, 20);
+      ULong  simm    = vex_sx_to_64(imm11_0, 12);
+      IRTemp dst_pc  = newTemp(irsb, Ity_I64);
+      assign(irsb, dst_pc, binop(Iop_Add64, getIReg64(rs1), mkU64(simm)));
+      if (rd != 0)
+         putIReg64(irsb, rd, mkU64(guest_pc_curr_instr + 4));
+      putPC(irsb, mkexpr(dst_pc));
+      dres->whatNext = Dis_StopHere;
+      if (rd == 0) {
+         if (rs1 == 1 /*x1/ra*/ && simm == 0) {
+            dres->jk_StopHere = Ijk_Ret;
+            DIP("ret\n");
+         } else {
+            dres->jk_StopHere = Ijk_Boring;
+            DIP("jr %lld(%s)\n", (Long)simm, nameIReg(rs1));
+         }
+      } else {
+         dres->jk_StopHere = Ijk_Call;
+         DIP("jalr %s, %lld(%s)\n", nameIReg(rd), (Long)simm, nameIReg(rs1));
+      }
+      return True;
+   }
+
+   /* ------------ {beq,bne} rs1, rs2, imm[12:1] ------------ */
+   /* ------------ {blt,bge} rs1, rs2, imm[12:1] ------------ */
+   /* ----------- {bltu,bgeu} rs1, rs2, imm[12:1] ----------- */
+   if (INSN(6, 0) == 0b1100011) {
+      UInt funct3  = INSN(14, 12);
+      UInt rs1     = INSN(19, 15);
+      UInt rs2     = INSN(24, 20);
+      UInt imm12_1 = INSN(31, 31) << 11 | INSN(7, 7) << 10 | INSN(30, 25) << 4 |
+                     INSN(11, 8);
+      if (funct3 == 0b010 || funct3 == 0b011) {
+         /* Invalid B<x>, fall through. */
+      } else {
+         ULong        simm   = vex_sx_to_64(imm12_1 << 1, 13);
+         ULong        dst_pc = guest_pc_curr_instr + simm;
+         const HChar* name;
+         IRExpr*      cond;
+         switch (funct3) {
+         case 0b000:
+            name = "beq";
+            cond = binop(Iop_CmpEQ64, getIReg64(rs1), getIReg64(rs2));
+            break;
+         case 0b001:
+            name = "bne";
+            cond = binop(Iop_CmpNE64, getIReg64(rs1), getIReg64(rs2));
+            break;
+         case 0b100:
+            name = "blt";
+            cond = binop(Iop_CmpLT64S, getIReg64(rs1), getIReg64(rs2));
+            break;
+         case 0b101:
+            name = "bge";
+            cond = binop(Iop_CmpLE64S, getIReg64(rs2), getIReg64(rs1));
+            break;
+         case 0b110:
+            name = "bltu";
+            cond = binop(Iop_CmpLT64U, getIReg64(rs1), getIReg64(rs2));
+            break;
+         case 0b111:
+            name = "bgeu";
+            cond = binop(Iop_CmpLE64U, getIReg64(rs2), getIReg64(rs1));
+            break;
+         default:
+            vassert(0);
+         }
+         stmt(irsb,
+              IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(dst_pc), OFFB_PC));
+         putPC(irsb, mkU64(guest_pc_curr_instr + 4));
+         dres->whatNext    = Dis_StopHere;
+         dres->jk_StopHere = Ijk_Boring;
+         DIP("%s %s, %s, 0x%llx\n", name, nameIReg(rs1), nameIReg(rs2), dst_pc);
+         return True;
+      }
+   }
+
+   /* ---------- {lb,lh,lw,ld} rd, imm[11:0](rs1) ----------- */
+   /* ---------- {lbu,lhu,lwu} rd, imm[11:0](rs1) ----------- */
+   if (INSN(6, 0) == 0b0000011) {
+      UInt funct3  = INSN(14, 12);
+      UInt rd      = INSN(11, 7);
+      UInt rs1     = INSN(19, 15);
+      UInt imm11_0 = INSN(31, 20);
+      if (funct3 == 0b111) {
+         /* Invalid L<x>, fall through. */
+      } else {
+         ULong simm = vex_sx_to_64(imm11_0, 12);
+         if (rd != 0) {
+            IRExpr* ea = binop(Iop_Add64, getIReg64(rs1), mkU64(simm));
+            IRExpr* expr;
+            switch (funct3) {
+            case 0b000:
+               expr = unop(Iop_8Sto64, loadLE(Ity_I8, ea));
+               break;
+            case 0b001:
+               expr = unop(Iop_16Sto64, loadLE(Ity_I16, ea));
+               break;
+            case 0b010:
+               expr = unop(Iop_32Sto64, loadLE(Ity_I32, ea));
+               break;
+            case 0b011:
+               expr = loadLE(Ity_I64, ea);
+               break;
+            case 0b100:
+               expr = unop(Iop_8Uto64, loadLE(Ity_I8, ea));
+               break;
+            case 0b101:
+               expr = unop(Iop_16Uto64, loadLE(Ity_I16, ea));
+               break;
+            case 0b110:
+               expr = unop(Iop_32Uto64, loadLE(Ity_I32, ea));
+               break;
+            default:
+               vassert(0);
+            }
+            putIReg64(irsb, rd, expr);
+         }
+         const HChar* name;
+         switch (funct3) {
+         case 0b000:
+            name = "lb";
+            break;
+         case 0b001:
+            name = "lh";
+            break;
+         case 0b010:
+            name = "lw";
+            break;
+         case 0b011:
+            name = "ld";
+            break;
+         case 0b100:
+            name = "lbu";
+            break;
+         case 0b101:
+            name = "lhu";
+            break;
+         case 0b110:
+            name = "lwu";
+            break;
+         default:
+            vassert(0);
+         }
+         DIP("%s %s, %lld(%s)\n", name, nameIReg(rd), (Long)simm,
+             nameIReg(rs1));
+         return True;
+      }
+   }
+
+   /* ---------- {sb,sh,sw,sd} rs2, imm[11:0](rs1) ---------- */
+   if (INSN(6, 0) == 0b0100011) {
+      UInt funct3  = INSN(14, 12);
+      UInt rs1     = INSN(19, 15);
+      UInt rs2     = INSN(24, 20);
+      UInt imm11_0 = INSN(31, 25) << 5 | INSN(11, 7);
+      if (funct3 == 0b100 || funct3 == 0b101 || funct3 == 0b110 ||
+          funct3 == 0b111) {
+         /* Invalid S<x>, fall through. */
+      } else {
+         ULong        simm = vex_sx_to_64(imm11_0, 12);
+         IRExpr*      ea   = binop(Iop_Add64, getIReg64(rs1), mkU64(simm));
+         const HChar* name;
+         IRExpr*      expr;
+         switch (funct3) {
+         case 0b000:
+            name = "sb";
+            expr = unop(Iop_64to8, getIReg64(rs2));
+            break;
+         case 0b001:
+            name = "sh";
+            expr = unop(Iop_64to16, getIReg64(rs2));
+            break;
+         case 0b010:
+            name = "sw";
+            expr = unop(Iop_64to32, getIReg64(rs2));
+            break;
+         case 0b011:
+            name = "sd";
+            expr = getIReg64(rs2);
+            break;
+         default:
+            vassert(0);
+         }
+         storeLE(irsb, ea, expr);
+         DIP("%s %s, %lld(%s)\n", name, nameIReg(rs2), (Long)simm,
+             nameIReg(rs1));
+         return True;
+      }
+   }
+
+   /* -------- {addi,slti,sltiu} rd, rs1, imm[11:0] --------- */
+   /* --------- {xori,ori,andi} rd, rs1, imm[11:0] ---------- */
+   if (INSN(6, 0) == 0b0010011) {
+      UInt funct3  = INSN(14, 12);
+      UInt rd      = INSN(11, 7);
+      UInt rs1     = INSN(19, 15);
+      UInt imm11_0 = INSN(31, 20);
+      if (funct3 == 0b001 || funct3 == 0b101) {
+         /* Invalid <x>I, fall through. */
+      } else {
+         ULong simm = vex_sx_to_64(imm11_0, 12);
+         if (rd != 0) {
+            IRExpr* expr;
+            switch (funct3) {
+            case 0b000:
+               expr = binop(Iop_Add64, getIReg64(rs1), mkU64(simm));
+               break;
+            case 0b010:
+               expr = unop(Iop_1Uto64,
+                           binop(Iop_CmpLT64S, getIReg64(rs1), mkU64(simm)));
+               break;
+            case 0b011:
+               /* Note that the comparison itself is unsigned but the immediate
+                  is sign-extended. */
+               expr = unop(Iop_1Uto64,
+                           binop(Iop_CmpLT64U, getIReg64(rs1), mkU64(simm)));
+               break;
+            case 0b100:
+               expr = binop(Iop_Xor64, getIReg64(rs1), mkU64(simm));
+               break;
+            case 0b110:
+               expr = binop(Iop_Or64, getIReg64(rs1), mkU64(simm));
+               break;
+            case 0b111:
+               expr = binop(Iop_And64, getIReg64(rs1), mkU64(simm));
+               break;
+            default:
+               vassert(0);
+            }
+            putIReg64(irsb, rd, expr);
+         }
+         const HChar* name;
+         switch (funct3) {
+         case 0b000:
+            name = "addi";
+            break;
+         case 0b010:
+            name = "slti";
+            break;
+         case 0b011:
+            name = "sltiu";
+            break;
+         case 0b100:
+            name = "xori";
+            break;
+         case 0b110:
+            name = "ori";
+            break;
+         case 0b111:
+            name = "andi";
+            break;
+         default:
+            vassert(0);
+         }
+         DIP("%s %s, %s, %lld\n", name, nameIReg(rd), nameIReg(rs1),
+             (Long)simm);
+         return True;
+      }
+   }
+
+   /* --------------- slli rd, rs1, uimm[5:0] --------------- */
+   if (INSN(6, 0) == 0b0010011 && INSN(14, 12) == 0b001 &&
+       INSN(31, 26) == 0b000000) {
+      UInt rd      = INSN(11, 7);
+      UInt rs1     = INSN(19, 15);
+      UInt uimm5_0 = INSN(25, 20);
+      if (rd != 0)
+         putIReg64(irsb, rd, binop(Iop_Shl64, getIReg64(rs1), mkU8(uimm5_0)));
+      DIP("slli %s, %s, %u\n", nameIReg(rd), nameIReg(rs1), uimm5_0);
+      return True;
+   }
+
+   /* ----------- {srli,srai} rd, rs1, uimm[5:0] ----------=- */
+   if (INSN(6, 0) == 0b0010011 && INSN(14, 12) == 0b101 &&
+       INSN(29, 26) == 0b0000 && INSN(31, 31) == 0b0) {
+      Bool is_log  = INSN(30, 30) == 0b0;
+      UInt rd      = INSN(11, 7);
+      UInt rs1     = INSN(19, 15);
+      UInt uimm5_0 = INSN(25, 20);
+      if (rd != 0)
+         putIReg64(irsb, rd,
+                   binop(is_log ? Iop_Shr64 : Iop_Sar64, getIReg64(rs1),
+                         mkU8(uimm5_0)));
+      DIP("%s %s, %s, %u\n", is_log ? "srli" : "srai", nameIReg(rd),
+          nameIReg(rs1), uimm5_0);
+      return True;
+   }
+
+   /* --------------- {add,sub} rd, rs1, rs2 ---------------- */
+   /* ------------- {sll,srl,sra} rd, rs1, rs2 -------------- */
+   /* --------------- {slt,sltu} rd, rs1, rs2 --------------- */
+   /* -------------- {xor,or,and} rd, rs1, rs2 -------------- */
+   if (INSN(6, 0) == 0b0110011 && INSN(29, 25) == 0b00000 &&
+       INSN(31, 31) == 0b0) {
+      UInt funct3  = INSN(14, 12);
+      Bool is_base = INSN(30, 30) == 0b0;
+      UInt rd      = INSN(11, 7);
+      UInt rs1     = INSN(19, 15);
+      UInt rs2     = INSN(24, 20);
+      if (!is_base && funct3 != 0b000 && funct3 != 0b101) {
+         /* Invalid <x>, fall through. */
+      } else {
+         if (rd != 0) {
+            IRExpr* expr;
+            switch (funct3) {
+            case 0b000: /* sll */
+               expr = binop(is_base ? Iop_Add64 : Iop_Sub64, getIReg64(rs1),
+                            getIReg64(rs2));
+               break;
+            case 0b001:
+               expr = binop(Iop_Shl64, getIReg64(rs1),
+                            unop(Iop_64to8, getIReg64(rs2)));
+               break;
+            case 0b010:
+               expr = unop(Iop_1Uto64,
+                           binop(Iop_CmpLT64S, getIReg64(rs1), getIReg64(rs2)));
+               break;
+            case 0b011:
+               expr = unop(Iop_1Uto64,
+                           binop(Iop_CmpLT64U, getIReg64(rs1), getIReg64(rs2)));
+               break;
+            case 0b100:
+               expr = binop(Iop_Xor64, getIReg64(rs1), getIReg64(rs2));
+               break;
+            case 0b101:
+               expr = binop(is_base ? Iop_Shr64 : Iop_Sar64, getIReg64(rs1),
+                            unop(Iop_64to8, getIReg64(rs2)));
+               break;
+            case 0b110:
+               expr = binop(Iop_Or64, getIReg64(rs1), getIReg64(rs2));
+               break;
+            case 0b111:
+               expr = binop(Iop_And64, getIReg64(rs1), getIReg64(rs2));
+               break;
+            default:
+               vassert(0);
+            }
+            putIReg64(irsb, rd, expr);
+         }
+         const HChar* name;
+         switch (funct3) {
+         case 0b000:
+            name = is_base ? "add" : "sub";
+            break;
+         case 0b001:
+            name = "sll";
+            break;
+         case 0b010:
+            name = "slt";
+            break;
+         case 0b011:
+            name = "sltu";
+            break;
+         case 0b100:
+            name = "xor";
+            break;
+         case 0b101:
+            name = is_base ? "srl" : "sra";
+            break;
+         case 0b110:
+            name = "or";
+            break;
+         case 0b111:
+            name = "and";
+            break;
+         default:
+            vassert(0);
+         }
+         DIP("%s %s, %s, %s\n", name, nameIReg(rd), nameIReg(rs1),
+             nameIReg(rs2));
+         return True;
+      }
+   }
+
+   /* ------------------------ fence ------------------------ */
+   if (INSN(19, 0) == 0b00000000000000001111 && INSN(31, 28) == 0b0000) {
+      UInt succ = INSN(23, 20);
+      UInt pred = INSN(27, 24);
+      stmt(irsb, IRStmt_MBE(Imbe_Fence));
+      if (pred == 0b1111 && succ == 0b1111)
+         DIP("fence\n");
+      else
+         DIP("fence %s%s%s%s,%s%s%s%s\n", (pred & 0x8) ? "i" : "",
+             (pred & 0x4) ? "o" : "", (pred & 0x2) ? "r" : "",
+             (pred & 0x1) ? "w" : "", (succ & 0x8) ? "i" : "",
+             (succ & 0x4) ? "o" : "", (succ & 0x2) ? "r" : "",
+             (succ & 0x1) ? "w" : "");
+      return True;
+   }
+
+   /* ------------------------ ecall ------------------------ */
+   if (INSN(31, 0) == 0b00000000000000000000000001110011) {
+      putPC(irsb, mkU64(guest_pc_curr_instr + 4));
+      dres->whatNext    = Dis_StopHere;
+      dres->jk_StopHere = Ijk_Sys_syscall;
+      DIP("ecall\n");
+      return True;
+   }
+
+   /* ------------------------ ebreak ------------------------ */
+   if (INSN(31, 0) == 0b00000000000100000000000001110011) {
+      putPC(irsb, mkU64(guest_pc_curr_instr + 4));
+      dres->whatNext    = Dis_StopHere;
+      dres->jk_StopHere = Ijk_SigTRAP;
+      DIP("ebreak\n");
+      return True;
+   }
+
+   /* -------------- addiw rd, rs1, imm[11:0] --------------- */
+   if (INSN(6, 0) == 0b0011011 && INSN(14, 12) == 0b000) {
+      UInt rd      = INSN(11, 7);
+      UInt rs1     = INSN(19, 15);
+      UInt imm11_0 = INSN(31, 20);
+      UInt simm    = (UInt)vex_sx_to_64(imm11_0, 12);
+      if (rd != 0)
+         putIReg32(irsb, rd, binop(Iop_Add32, getIReg32(rs1), mkU32(simm)));
+      DIP("addiw %s, %s, %d\n", nameIReg(rd), nameIReg(rs1), (Int)simm);
+      return True;
+   }
+
+   /* -------------- slliw rd, rs1, uimm[4:0] --------------- */
+   if (INSN(6, 0) == 0b0011011 && INSN(14, 12) == 0b001 &&
+       INSN(31, 25) == 0b0000000) {
+      UInt rd      = INSN(11, 7);
+      UInt rs1     = INSN(19, 15);
+      UInt uimm4_0 = INSN(24, 20);
+      if (rd != 0)
+         putIReg32(irsb, rd, binop(Iop_Shl32, getIReg32(rs1), mkU8(uimm4_0)));
+      DIP("slliw %s, %s, %u\n", nameIReg(rd), nameIReg(rs1), uimm4_0);
+      return True;
+   }
+
+   /* ---------- {srliw,sraiw} rd, rs1, uimm[4:0] ----------- */
+   if (INSN(6, 0) == 0b0011011 && INSN(14, 12) == 0b101 &&
+       INSN(29, 25) == 0b00000 && INSN(31, 31) == 0b0) {
+      Bool is_log  = INSN(30, 30) == 0b0;
+      UInt rd      = INSN(11, 7);
+      UInt rs1     = INSN(19, 15);
+      UInt uimm4_0 = INSN(24, 20);
+      if (rd != 0)
+         putIReg32(irsb, rd,
+                   binop(is_log ? Iop_Shr32 : Iop_Sar32, getIReg32(rs1),
+                         mkU8(uimm4_0)));
+      DIP("%s %s, %s, %u\n", is_log ? "srliw" : "sraiw", nameIReg(rd),
+          nameIReg(rs1), uimm4_0);
+      return True;
+   }
+
+   /* -------------- {addw,subw} rd, rs1, rs2 --------------- */
+   if (INSN(6, 0) == 0b0111011 && INSN(14, 12) == 0b000 &&
+       INSN(29, 25) == 0b00000 && INSN(31, 31) == 0b0) {
+      Bool is_add = INSN(30, 30) == 0b0;
+      UInt rd     = INSN(11, 7);
+      UInt rs1    = INSN(19, 15);
+      UInt rs2    = INSN(24, 20);
+      if (rd != 0)
+         putIReg32(irsb, rd,
+                   binop(is_add ? Iop_Add32 : Iop_Sub32, getIReg32(rs1),
+                         getIReg32(rs2)));
+      DIP("%s %s, %s, %s\n", is_add ? "addw" : "subw", nameIReg(rd),
+          nameIReg(rs1), nameIReg(rs2));
+      return True;
+   }
+
+   /* ------------------ sllw rd, rs1, rs2 ------------------ */
+   if (INSN(6, 0) == 0b0111011 && INSN(14, 12) == 0b001 &&
+       INSN(31, 25) == 0b0000000) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      UInt rs2 = INSN(24, 20);
+      if (rd != 0)
+         putIReg32(
+            irsb, rd,
+            binop(Iop_Shl32, getIReg32(rs1), unop(Iop_64to8, getIReg64(rs2))));
+      DIP("sllw %s, %s, %s\n", nameIReg(rd), nameIReg(rs1), nameIReg(rs2));
+      return True;
+   }
+
+   /* -------------- {srlw,sraw} rd, rs1, rs2 --------------- */
+   if (INSN(6, 0) == 0b0111011 && INSN(14, 12) == 0b101 &&
+       INSN(29, 25) == 0b00000 && INSN(31, 31) == 0b0) {
+      Bool is_log = INSN(30, 30) == 0b0;
+      UInt rd     = INSN(11, 7);
+      UInt rs1    = INSN(19, 15);
+      UInt rs2    = INSN(24, 20);
+      if (rd != 0)
+         putIReg32(irsb, rd,
+                   binop(is_log ? Iop_Shr32 : Iop_Sar32, getIReg32(rs1),
+                         unop(Iop_64to8, getIReg64(rs2))));
+      DIP("%s %s, %s, %s\n", is_log ? "srlw" : "sraw", nameIReg(rd),
+          nameIReg(rs1), nameIReg(rs2));
+      return True;
+   }
+
+   return False;
+}
+
+static Bool dis_RV64M(/*MB_OUT*/ DisResult* dres,
+                      /*OUT*/ IRSB*         irsb,
+                      UInt                  insn)
+{
+   /* -------------- RV64M standard extension --------------- */
+
+   /* -------- {mul,mulh,mulhsu,mulhu} rd, rs1, rs2 --------- */
+   /* --------------- {div,divu} rd, rs1, rs2 --------------- */
+   /* --------------- {rem,remu} rd, rs1, rs2 --------------- */
+   if (INSN(6, 0) == 0b0110011 && INSN(31, 25) == 0b0000001) {
+      UInt rd     = INSN(11, 7);
+      UInt funct3 = INSN(14, 12);
+      UInt rs1    = INSN(19, 15);
+      UInt rs2    = INSN(24, 20);
+      if (funct3 == 0b010) {
+         /* Invalid {MUL,DIV,REM}<x>, fall through. */
+      } else if (funct3 == 0b010) {
+         /* MULHSU, not currently handled, fall through. */
+      } else {
+         if (rd != 0) {
+            IRExpr* expr;
+            switch (funct3) {
+            case 0b000:
+               expr = binop(Iop_Mul64, getIReg64(rs1), getIReg64(rs2));
+               break;
+            case 0b001:
+               expr = unop(Iop_128HIto64,
+                           binop(Iop_MullS64, getIReg64(rs1), getIReg64(rs2)));
+               break;
+            case 0b011:
+               expr = unop(Iop_128HIto64,
+                           binop(Iop_MullU64, getIReg64(rs1), getIReg64(rs2)));
+               break;
+            case 0b100:
+               expr = binop(Iop_DivS64, getIReg64(rs1), getIReg64(rs2));
+               break;
+            case 0b101:
+               expr = binop(Iop_DivU64, getIReg64(rs1), getIReg64(rs2));
+               break;
+            case 0b110:
+               expr =
+                  unop(Iop_128HIto64, binop(Iop_DivModS64to64, getIReg64(rs1),
+                                            getIReg64(rs2)));
+               break;
+            case 0b111:
+               expr =
+                  unop(Iop_128HIto64, binop(Iop_DivModU64to64, getIReg64(rs1),
+                                            getIReg64(rs2)));
+               break;
+            default:
+               vassert(0);
+            }
+            putIReg64(irsb, rd, expr);
+         }
+         const HChar* name;
+         switch (funct3) {
+         case 0b000:
+            name = "mul";
+            break;
+         case 0b001:
+            name = "mulh";
+            break;
+         case 0b011:
+            name = "mulhu";
+            break;
+         case 0b100:
+            name = "div";
+            break;
+         case 0b101:
+            name = "divu";
+            break;
+         case 0b110:
+            name = "rem";
+            break;
+         case 0b111:
+            name = "remu";
+            break;
+         default:
+            vassert(0);
+         }
+         DIP("%s %s, %s, %s\n", name, nameIReg(rd), nameIReg(rs1),
+             nameIReg(rs2));
+         return True;
+      }
+   }
+
+   /* ------------------ mulw rd, rs1, rs2 ------------------ */
+   /* -------------- {divw,divuw} rd, rs1, rs2 -------------- */
+   /* -------------- {remw,remuw} rd, rs1, rs2 -------------- */
+   if (INSN(6, 0) == 0b0111011 && INSN(31, 25) == 0b0000001) {
+      UInt rd     = INSN(11, 7);
+      UInt funct3 = INSN(14, 12);
+      UInt rs1    = INSN(19, 15);
+      UInt rs2    = INSN(24, 20);
+      if (funct3 == 0b001 || funct3 == 0b010 || funct3 == 0b011) {
+         /* Invalid {MUL,DIV,REM}<x>W, fall through. */
+      } else {
+         if (rd != 0) {
+            IRExpr* expr;
+            switch (funct3) {
+            case 0b000:
+               expr = binop(Iop_Mul32, getIReg32(rs1), getIReg32(rs2));
+               break;
+            case 0b100:
+               expr = binop(Iop_DivS32, getIReg32(rs1), getIReg32(rs2));
+               break;
+            case 0b101:
+               expr = binop(Iop_DivU32, getIReg32(rs1), getIReg32(rs2));
+               break;
+            case 0b110:
+               expr = unop(Iop_64HIto32, binop(Iop_DivModS32to32,
+                                               getIReg32(rs1), getIReg32(rs2)));
+               break;
+            case 0b111:
+               expr = unop(Iop_64HIto32, binop(Iop_DivModU32to32,
+                                               getIReg32(rs1), getIReg32(rs2)));
+               break;
+            default:
+               vassert(0);
+            }
+            putIReg32(irsb, rd, expr);
+         }
+         const HChar* name;
+         switch (funct3) {
+         case 0b000:
+            name = "mulw";
+            break;
+         case 0b100:
+            name = "divw";
+            break;
+         case 0b101:
+            name = "divuw";
+            break;
+         case 0b110:
+            name = "remw";
+            break;
+         case 0b111:
+            name = "remuw";
+            break;
+         default:
+            vassert(0);
+         }
+         DIP("%s %s, %s, %s\n", name, nameIReg(rd), nameIReg(rs1),
+             nameIReg(rs2));
+         return True;
+      }
+   }
+
+   return False;
+}
+
+static Bool dis_RV64A(/*MB_OUT*/ DisResult* dres,
+                      /*OUT*/ IRSB*         irsb,
+                      UInt                  insn,
+                      Addr                  guest_pc_curr_instr,
+                      const VexAbiInfo*     abiinfo)
+{
+   /* -------------- RV64A standard extension --------------- */
+
+   /* ----------------- lr.{w,d} rd, (rs1) ------------------ */
+   if (INSN(6, 0) == 0b0101111 && INSN(14, 13) == 0b01 &&
+       INSN(24, 20) == 0b00000 && INSN(31, 27) == 0b00010) {
+      UInt rd    = INSN(11, 7);
+      Bool is_32 = INSN(12, 12) == 0b0;
+      UInt rs1   = INSN(19, 15);
+      UInt aqrl  = INSN(26, 25);
+
+      if (aqrl & 0x1)
+         stmt(irsb, IRStmt_MBE(Imbe_Fence));
+
+      IRType ty = is_32 ? Ity_I32 : Ity_I64;
+      if (abiinfo->guest__use_fallback_LLSC) {
+         /* Get address of the load. */
+         IRTemp ea = newTemp(irsb, Ity_I64);
+         assign(irsb, ea, getIReg64(rs1));
+
+         /* Load the value. */
+         IRTemp res = newTemp(irsb, Ity_I64);
+         assign(irsb, res, widenSto64(ty, loadLE(ty, mkexpr(ea))));
+
+         /* Set up the LLSC fallback data. */
+         stmt(irsb, IRStmt_Put(OFFB_LLSC_DATA, mkexpr(res)));
+         stmt(irsb, IRStmt_Put(OFFB_LLSC_ADDR, mkexpr(ea)));
+         stmt(irsb, IRStmt_Put(OFFB_LLSC_SIZE, mkU64(4)));
+
+         /* Write the result to the destination register. */
+         if (rd != 0)
+            putIReg64(irsb, rd, mkexpr(res));
+      } else {
+         /* TODO Rework the non-fallback mode by recognizing common LR+SC
+            sequences and simulating them as one. */
+         IRTemp res = newTemp(irsb, ty);
+         stmt(irsb, IRStmt_LLSC(Iend_LE, res, getIReg64(rs1), NULL /*LL*/));
+         if (rd != 0)
+            putIReg64(irsb, rd, widenSto64(ty, mkexpr(res)));
+      }
+
+      if (aqrl & 0x2)
+         stmt(irsb, IRStmt_MBE(Imbe_Fence));
+
+      DIP("lr.%s%s %s, (%s)%s\n", is_32 ? "w" : "d", nameAqRlSuffix(aqrl),
+          nameIReg(rd), nameIReg(rs1),
+          abiinfo->guest__use_fallback_LLSC ? " (fallback implementation)"
+                                            : "");
+      return True;
+   }
+
+   /* --------------- sc.{w,d} rd, rs2, (rs1) --------------- */
+   if (INSN(6, 0) == 0b0101111 && INSN(14, 13) == 0b01 &&
+       INSN(31, 27) == 0b00011) {
+      UInt rd    = INSN(11, 7);
+      Bool is_32 = INSN(12, 12) == 0b0;
+      UInt rs1   = INSN(19, 15);
+      UInt rs2   = INSN(24, 20);
+      UInt aqrl  = INSN(26, 25);
+
+      if (aqrl & 0x1)
+         stmt(irsb, IRStmt_MBE(Imbe_Fence));
+
+      IRType ty = is_32 ? Ity_I32 : Ity_I64;
+      if (abiinfo->guest__use_fallback_LLSC) {
+         /* Get address of the load. */
+         IRTemp ea = newTemp(irsb, Ity_I64);
+         assign(irsb, ea, getIReg64(rs1));
+
+         /* Get the continuation address. */
+         IRConst* nia = IRConst_U64(guest_pc_curr_instr + 4);
+
+         /* Mark the SC initially as failed. */
+         if (rd != 0)
+            putIReg64(irsb, rd, mkU64(1));
+
+         /* Set that no transaction is in progress. */
+         IRTemp size = newTemp(irsb, Ity_I64);
+         assign(irsb, size, IRExpr_Get(OFFB_LLSC_SIZE, Ity_I64));
+         stmt(irsb,
+              IRStmt_Put(OFFB_LLSC_SIZE, mkU64(0) /* "no transaction" */));
+
+         /* Fail if no or wrong-size transaction. */
+         stmt(irsb, IRStmt_Exit(binop(Iop_CmpNE64, mkexpr(size), mkU64(4)),
+                                Ijk_Boring, nia, OFFB_PC));
+
+         /* Fail if the address doesn't match the LL address. */
+         stmt(irsb, IRStmt_Exit(binop(Iop_CmpNE64, mkexpr(ea),
+                                      IRExpr_Get(OFFB_LLSC_ADDR, Ity_I64)),
+                                Ijk_Boring, nia, OFFB_PC));
+
+         /* Fail if the data doesn't match the LL data. */
+         IRTemp data = newTemp(irsb, Ity_I64);
+         assign(irsb, data, IRExpr_Get(OFFB_LLSC_DATA, Ity_I64));
+         stmt(irsb, IRStmt_Exit(binop(Iop_CmpNE64,
+                                      widenSto64(ty, loadLE(ty, mkexpr(ea))),
+                                      mkexpr(data)),
+                                Ijk_Boring, nia, OFFB_PC));
+
+         /* Try to CAS the new value in. */
+         IRTemp old  = newTemp(irsb, ty);
+         IRTemp expd = newTemp(irsb, ty);
+         assign(irsb, expd, narrowFrom64(ty, mkexpr(data)));
+         stmt(irsb, IRStmt_CAS(mkIRCAS(
+                       /*oldHi*/ IRTemp_INVALID, old, Iend_LE, mkexpr(ea),
+                       /*expdHi*/ NULL, mkexpr(expd),
+                       /*dataHi*/ NULL, narrowFrom64(ty, getIReg64(rs2)))));
+
+         /* Fail if the CAS failed (old != expd). */
+         stmt(irsb, IRStmt_Exit(binop(is_32 ? Iop_CmpNE32 : Iop_CmpNE64,
+                                      mkexpr(old), mkexpr(expd)),
+                                Ijk_Boring, nia, OFFB_PC));
+
+         /* Otherwise mark the operation as successful. */
+         if (rd != 0)
+            putIReg64(irsb, rd, mkU64(0));
+      } else {
+         IRTemp res = newTemp(irsb, Ity_I1);
+         stmt(irsb, IRStmt_LLSC(Iend_LE, res, getIReg64(rs1),
+                                narrowFrom64(ty, getIReg64(rs2))));
+         /* IR semantics: res is 1 if store succeeds, 0 if it fails. Need to set
+            rd to 1 on failure, 0 on success. */
+         if (rd != 0)
+            putIReg64(
+               irsb, rd,
+               binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)), mkU64(1)));
+      }
+
+      if (aqrl & 0x2)
+         stmt(irsb, IRStmt_MBE(Imbe_Fence));
+
+      DIP("sc.%s%s %s, %s, (%s)%s\n", is_32 ? "w" : "d", nameAqRlSuffix(aqrl),
+          nameIReg(rd), nameIReg(rs2), nameIReg(rs1),
+          abiinfo->guest__use_fallback_LLSC ? " (fallback implementation)"
+                                            : "");
+      return True;
+   }
+
+   /* --------- amo{swap,add}.{w,d} rd, rs2, (rs1) ---------- */
+   /* -------- amo{xor,and,or}.{w,d} rd, rs2, (rs1) --------- */
+   /* ---------- amo{min,max}.{w,d} rd, rs2, (rs1) ---------- */
+   /* --------- amo{minu,maxu}.{w,d} rd, rs2, (rs1) --------- */
+   if (INSN(6, 0) == 0b0101111 && INSN(14, 13) == 0b01) {
+      UInt rd     = INSN(11, 7);
+      Bool is_32  = INSN(12, 12) == 0b0;
+      UInt rs1    = INSN(19, 15);
+      UInt rs2    = INSN(24, 20);
+      UInt aqrl   = INSN(26, 25);
+      UInt funct5 = INSN(31, 27);
+      if ((funct5 & 0b00010) || funct5 == 0b00101 || funct5 == 0b01001 ||
+          funct5 == 0b01101 || funct5 == 0b10001 || funct5 == 0b10101 ||
+          funct5 == 0b11001 || funct5 == 0b11101) {
+         /* Invalid AMO<x>, fall through. */
+      } else {
+         if (aqrl & 0x1)
+            stmt(irsb, IRStmt_MBE(Imbe_Fence));
+
+         IRTemp addr = newTemp(irsb, Ity_I64);
+         assign(irsb, addr, getIReg64(rs1));
+
+         IRType ty   = is_32 ? Ity_I32 : Ity_I64;
+         IRTemp orig = newTemp(irsb, ty);
+         assign(irsb, orig, loadLE(ty, mkexpr(addr)));
+         IRExpr* lhs = mkexpr(orig);
+         IRExpr* rhs = narrowFrom64(ty, getIReg64(rs2));
+
+         /* Perform the operation. */
+         const HChar* name;
+         IRExpr*      res;
+         switch (funct5) {
+         case 0b00001:
+            name = "amoswap";
+            res  = rhs;
+            break;
+         case 0b00000:
+            name = "amoadd";
+            res  = binop(is_32 ? Iop_Add32 : Iop_Add64, lhs, rhs);
+            break;
+         case 0b00100:
+            name = "amoxor";
+            res  = binop(is_32 ? Iop_Xor32 : Iop_Xor64, lhs, rhs);
+            break;
+         case 0b01100:
+            name = "amoand";
+            res  = binop(is_32 ? Iop_And32 : Iop_And64, lhs, rhs);
+            break;
+         case 0b01000:
+            name = "amoor";
+            res  = binop(is_32 ? Iop_Or32 : Iop_Or64, lhs, rhs);
+            break;
+         case 0b10000:
+            name = "amomin";
+            res  = IRExpr_ITE(
+                binop(is_32 ? Iop_CmpLT32S : Iop_CmpLT64S, lhs, rhs), lhs, rhs);
+            break;
+         case 0b10100:
+            name = "amomax";
+            res  = IRExpr_ITE(
+                binop(is_32 ? Iop_CmpLT32S : Iop_CmpLT64S, lhs, rhs), rhs, lhs);
+            break;
+         case 0b11000:
+            name = "amominu";
+            res  = IRExpr_ITE(
+                binop(is_32 ? Iop_CmpLT32U : Iop_CmpLT64U, lhs, rhs), lhs, rhs);
+            break;
+         case 0b11100:
+            name = "amomaxu";
+            res  = IRExpr_ITE(
+                binop(is_32 ? Iop_CmpLT32U : Iop_CmpLT64U, lhs, rhs), rhs, lhs);
+            break;
+         default:
+            vassert(0);
+         }
+
+         /* Store the result back if the original value remains unchanged in
+            memory. */
+         IRTemp old = newTemp(irsb, ty);
+         stmt(irsb, IRStmt_CAS(mkIRCAS(/*oldHi*/ IRTemp_INVALID, old, Iend_LE,
+                                       mkexpr(addr),
+                                       /*expdHi*/ NULL, mkexpr(orig),
+                                       /*dataHi*/ NULL, res)));
+
+         if (aqrl & 0x2)
+            stmt(irsb, IRStmt_MBE(Imbe_Fence));
+
+         /* Retry if the CAS failed (i.e. when old != orig). */
+         stmt(irsb, IRStmt_Exit(binop(is_32 ? Iop_CasCmpNE32 : Iop_CasCmpNE64,
+                                      mkexpr(old), mkexpr(orig)),
+                                Ijk_Boring, IRConst_U64(guest_pc_curr_instr),
+                                OFFB_PC));
+         /* Otherwise we succeeded. */
+         if (rd != 0)
+            putIReg64(irsb, rd, widenSto64(ty, mkexpr(old)));
+
+         DIP("%s.%s%s %s, %s, (%s)\n", name, is_32 ? "w" : "d",
+             nameAqRlSuffix(aqrl), nameIReg(rd), nameIReg(rs2), nameIReg(rs1));
+         return True;
+      }
+   }
+
+   return False;
+}
+
+static Bool dis_RV64F(/*MB_OUT*/ DisResult* dres,
+                      /*OUT*/ IRSB*         irsb,
+                      UInt                  insn)
+{
+   /* -------------- RV64F standard extension --------------- */
+
+   /* --------------- flw rd, imm[11:0](rs1) ---------------- */
+   if (INSN(6, 0) == 0b0000111 && INSN(14, 12) == 0b010) {
+      UInt  rd      = INSN(11, 7);
+      UInt  rs1     = INSN(19, 15);
+      UInt  imm11_0 = INSN(31, 20);
+      ULong simm    = vex_sx_to_64(imm11_0, 12);
+      putFReg32(irsb, rd,
+                loadLE(Ity_F32, binop(Iop_Add64, getIReg64(rs1), mkU64(simm))));
+      DIP("flw %s, %lld(%s)\n", nameFReg(rd), (Long)simm, nameIReg(rs1));
+      return True;
+   }
+
+   /* --------------- fsw rs2, imm[11:0](rs1) --------------- */
+   if (INSN(6, 0) == 0b0100111 && INSN(14, 12) == 0b010) {
+      UInt  rs1     = INSN(19, 15);
+      UInt  rs2     = INSN(24, 20);
+      UInt  imm11_0 = INSN(31, 25) << 5 | INSN(11, 7);
+      ULong simm    = vex_sx_to_64(imm11_0, 12);
+      storeLE(irsb, binop(Iop_Add64, getIReg64(rs1), mkU64(simm)),
+              getFReg32(rs2));
+      DIP("fsw %s, %lld(%s)\n", nameFReg(rs2), (Long)simm, nameIReg(rs1));
+      return True;
+   }
+
+   /* -------- f{madd,msub}.s rd, rs1, rs2, rs3, rm --------- */
+   /* ------- f{nmsub,nmadd}.s rd, rs1, rs2, rs3, rm -------- */
+   if (INSN(1, 0) == 0b11 && INSN(6, 4) == 0b100 && INSN(26, 25) == 0b00) {
+      UInt   opcode = INSN(6, 0);
+      UInt   rd     = INSN(11, 7);
+      UInt   rm     = INSN(14, 12);
+      UInt   rs1    = INSN(19, 15);
+      UInt   rs2    = INSN(24, 20);
+      UInt   rs3    = INSN(31, 27);
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      const HChar* name;
+      IRTemp       a1 = newTemp(irsb, Ity_F32);
+      IRTemp       a2 = newTemp(irsb, Ity_F32);
+      IRTemp       a3 = newTemp(irsb, Ity_F32);
+      switch (opcode) {
+      case 0b1000011:
+         name = "fmadd";
+         assign(irsb, a1, getFReg32(rs1));
+         assign(irsb, a2, getFReg32(rs2));
+         assign(irsb, a3, getFReg32(rs3));
+         break;
+      case 0b1000111:
+         name = "fmsub";
+         assign(irsb, a1, getFReg32(rs1));
+         assign(irsb, a2, getFReg32(rs2));
+         assign(irsb, a3, unop(Iop_NegF32, getFReg32(rs3)));
+         break;
+      case 0b1001011:
+         name = "fnmsub";
+         assign(irsb, a1, unop(Iop_NegF32, getFReg32(rs1)));
+         assign(irsb, a2, getFReg32(rs2));
+         assign(irsb, a3, getFReg32(rs3));
+         break;
+      case 0b1001111:
+         name = "fnmadd";
+         assign(irsb, a1, unop(Iop_NegF32, getFReg32(rs1)));
+         assign(irsb, a2, getFReg32(rs2));
+         assign(irsb, a3, unop(Iop_NegF32, getFReg32(rs3)));
+         break;
+      default:
+         vassert(0);
+      }
+      putFReg32(
+         irsb, rd,
+         qop(Iop_MAddF32, mkexpr(rm_IR), mkexpr(a1), mkexpr(a2), mkexpr(a3)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             "riscv64g_calculate_fflags_fmadd_s",
+                             riscv64g_calculate_fflags_fmadd_s,
+                             mkIRExprVec_4(mkexpr(a1), mkexpr(a2), mkexpr(a3),
+                                           mkexpr(rm_RISCV))));
+      DIP("%s.s %s, %s, %s, %s%s\n", name, nameFReg(rd), nameFReg(rs1),
+          nameFReg(rs2), nameFReg(rs3), nameRMOperand(rm));
+      return True;
+   }
+
+   /* ------------ f{add,sub}.s rd, rs1, rs2, rm ------------ */
+   /* ------------ f{mul,div}.s rd, rs1, rs2, rm ------------ */
+   if (INSN(6, 0) == 0b1010011 && INSN(26, 25) == 0b00 &&
+       INSN(31, 29) == 0b000) {
+      UInt   rd     = INSN(11, 7);
+      UInt   rm     = INSN(14, 12);
+      UInt   rs1    = INSN(19, 15);
+      UInt   rs2    = INSN(24, 20);
+      UInt   funct7 = INSN(31, 25);
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      const HChar* name;
+      IROp         op;
+      IRTemp       a1 = newTemp(irsb, Ity_F32);
+      IRTemp       a2 = newTemp(irsb, Ity_F32);
+      const HChar* helper_name;
+      void*        helper_addr;
+      switch (funct7) {
+      case 0b0000000:
+         name = "fadd";
+         op   = Iop_AddF32;
+         assign(irsb, a1, getFReg32(rs1));
+         assign(irsb, a2, getFReg32(rs2));
+         helper_name = "riscv64g_calculate_fflags_fadd_s";
+         helper_addr = riscv64g_calculate_fflags_fadd_s;
+         break;
+      case 0b0000100:
+         name = "fsub";
+         op   = Iop_AddF32;
+         assign(irsb, a1, getFReg32(rs1));
+         assign(irsb, a2, unop(Iop_NegF32, getFReg32(rs2)));
+         helper_name = "riscv64g_calculate_fflags_fadd_s";
+         helper_addr = riscv64g_calculate_fflags_fadd_s;
+         break;
+      case 0b0001000:
+         name = "fmul";
+         op   = Iop_MulF32;
+         assign(irsb, a1, getFReg32(rs1));
+         assign(irsb, a2, getFReg32(rs2));
+         helper_name = "riscv64g_calculate_fflags_fmul_s";
+         helper_addr = riscv64g_calculate_fflags_fmul_s;
+         break;
+      case 0b0001100:
+         name = "fdiv";
+         op   = Iop_DivF32;
+         assign(irsb, a1, getFReg32(rs1));
+         assign(irsb, a2, getFReg32(rs2));
+         helper_name = "riscv64g_calculate_fflags_fdiv_s";
+         helper_addr = riscv64g_calculate_fflags_fdiv_s;
+         break;
+      default:
+         vassert(0);
+      }
+      putFReg32(irsb, rd, triop(op, mkexpr(rm_IR), mkexpr(a1), mkexpr(a2)));
+      accumulateFFLAGS(irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/, helper_name,
+                                           helper_addr,
+                                           mkIRExprVec_3(mkexpr(a1), mkexpr(a2),
+                                                         mkexpr(rm_RISCV))));
+      DIP("%s.s %s, %s, %s%s\n", name, nameFReg(rd), nameFReg(rs1),
+          nameFReg(rs2), nameRMOperand(rm));
+      return True;
+   }
+
+   /* ----------------- fsqrt.s rd, rs1, rm ----------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 20) == 0b00000 &&
+       INSN(31, 25) == 0b0101100) {
+      UInt   rd  = INSN(11, 7);
+      UInt   rm  = INSN(14, 12);
+      UInt   rs1 = INSN(19, 15);
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      IRTemp a1 = newTemp(irsb, Ity_F32);
+      assign(irsb, a1, getFReg32(rs1));
+      putFReg32(irsb, rd, binop(Iop_SqrtF32, mkexpr(rm_IR), mkexpr(a1)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             "riscv64g_calculate_fflags_fsqrt_s",
+                             riscv64g_calculate_fflags_fsqrt_s,
+                             mkIRExprVec_2(mkexpr(a1), mkexpr(rm_RISCV))));
+      DIP("fsqrt.s %s, %s%s\n", nameFReg(rd), nameFReg(rs1), nameRMOperand(rm));
+      return True;
+   }
+
+   /* ---------------- fsgnj.s rd, rs1, rs2 ----------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b000 &&
+       INSN(31, 25) == 0b0010000) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      UInt rs2 = INSN(24, 20);
+      if (rs1 == rs2) {
+         putFReg32(irsb, rd, getFReg32(rs1));
+         DIP("fmv.s %s, %s\n", nameFReg(rd), nameIReg(rs1));
+      } else {
+         putFReg32(
+            irsb, rd,
+            unop(Iop_ReinterpI32asF32,
+                 binop(
+                    Iop_Or32,
+                    binop(Iop_And32, unop(Iop_ReinterpF32asI32, getFReg32(rs1)),
+                          mkU32(0x7fffffff)),
+                    binop(Iop_And32, unop(Iop_ReinterpF32asI32, getFReg32(rs2)),
+                          mkU32(0x80000000)))));
+         DIP("fsgnj.s %s, %s, %s\n", nameFReg(rd), nameIReg(rs1),
+             nameIReg(rs2));
+      }
+      return True;
+   }
+
+   /* ---------------- fsgnjn.s rd, rs1, rs2 ---------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b001 &&
+       INSN(31, 25) == 0b0010000) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      UInt rs2 = INSN(24, 20);
+      if (rs1 == rs2) {
+         putFReg32(irsb, rd, unop(Iop_NegF32, getFReg32(rs1)));
+         DIP("fneg.s %s, %s\n", nameFReg(rd), nameIReg(rs1));
+      } else {
+         putFReg32(irsb, rd,
+                   unop(Iop_ReinterpI32asF32,
+                        binop(Iop_Or32,
+                              binop(Iop_And32,
+                                    unop(Iop_ReinterpF32asI32, getFReg32(rs1)),
+                                    mkU32(0x7fffffff)),
+                              binop(Iop_And32,
+                                    unop(Iop_ReinterpF32asI32,
+                                         unop(Iop_NegF32, getFReg32(rs2))),
+                                    mkU32(0x80000000)))));
+         DIP("fsgnjn.s %s, %s, %s\n", nameFReg(rd), nameIReg(rs1),
+             nameIReg(rs2));
+      }
+      return True;
+   }
+
+   /* ---------------- fsgnjx.s rd, rs1, rs2 ---------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b010 &&
+       INSN(31, 25) == 0b0010000) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      UInt rs2 = INSN(24, 20);
+      if (rs1 == rs2) {
+         putFReg32(irsb, rd, unop(Iop_AbsF32, getFReg32(rs1)));
+         DIP("fabs.s %s, %s\n", nameFReg(rd), nameIReg(rs1));
+      } else {
+         putFReg32(
+            irsb, rd,
+            unop(Iop_ReinterpI32asF32,
+                 binop(Iop_Xor32, unop(Iop_ReinterpF32asI32, getFReg32(rs1)),
+                       binop(Iop_And32,
+                             unop(Iop_ReinterpF32asI32, getFReg32(rs2)),
+                             mkU32(0x80000000)))));
+         DIP("fsgnjx.s %s, %s, %s\n", nameFReg(rd), nameIReg(rs1),
+             nameIReg(rs2));
+      }
+      return True;
+   }
+
+   /* -------------- f{min,max}.s rd, rs1, rs2 -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(31, 25) == 0b0010100) {
+      UInt rd  = INSN(11, 7);
+      UInt rm  = INSN(14, 12);
+      UInt rs1 = INSN(19, 15);
+      UInt rs2 = INSN(24, 20);
+      if (rm != 0b000 && rm != 0b001) {
+         /* Invalid F{MIN,MAX}.S, fall through. */
+      } else {
+         const HChar* name;
+         IROp         op;
+         const HChar* helper_name;
+         void*        helper_addr;
+         switch (rm) {
+         case 0b000:
+            name        = "fmin";
+            op          = Iop_MinNumF32;
+            helper_name = "riscv64g_calculate_fflags_fmin_s";
+            helper_addr = riscv64g_calculate_fflags_fmin_s;
+            break;
+         case 0b001:
+            name        = "fmax";
+            op          = Iop_MaxNumF32;
+            helper_name = "riscv64g_calculate_fflags_fmax_s";
+            helper_addr = riscv64g_calculate_fflags_fmax_s;
+            break;
+         default:
+            vassert(0);
+         }
+         IRTemp a1 = newTemp(irsb, Ity_F32);
+         IRTemp a2 = newTemp(irsb, Ity_F32);
+         assign(irsb, a1, getFReg32(rs1));
+         assign(irsb, a2, getFReg32(rs2));
+         putFReg32(irsb, rd, binop(op, mkexpr(a1), mkexpr(a2)));
+         accumulateFFLAGS(irsb,
+                          mkIRExprCCall(Ity_I32, 0 /*regparms*/, helper_name,
+                                        helper_addr,
+                                        mkIRExprVec_2(mkexpr(a1), mkexpr(a2))));
+         DIP("%s.s %s, %s, %s\n", name, nameFReg(rd), nameFReg(rs1),
+             nameFReg(rs2));
+         return True;
+      }
+   }
+
+   /* -------------- fcvt.{w,wu}.s rd, rs1, rm -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 21) == 0b0000 &&
+       INSN(31, 25) == 0b1100000) {
+      UInt   rd        = INSN(11, 7);
+      UInt   rm        = INSN(14, 12);
+      UInt   rs1       = INSN(19, 15);
+      Bool   is_signed = INSN(20, 20) == 0b0;
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      IRTemp a1 = newTemp(irsb, Ity_F32);
+      assign(irsb, a1, getFReg32(rs1));
+      if (rd != 0)
+         putIReg32(irsb, rd,
+                   binop(is_signed ? Iop_F32toI32S : Iop_F32toI32U,
+                         mkexpr(rm_IR), mkexpr(a1)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             is_signed ? "riscv64g_calculate_fflags_fcvt_w_s"
+                                       : "riscv64g_calculate_fflags_fcvt_wu_s",
+                             is_signed ? riscv64g_calculate_fflags_fcvt_w_s
+                                       : riscv64g_calculate_fflags_fcvt_wu_s,
+                             mkIRExprVec_2(mkexpr(a1), mkexpr(rm_RISCV))));
+      DIP("fcvt.w%s.s %s, %s%s\n", is_signed ? "" : "u", nameIReg(rd),
+          nameFReg(rs1), nameRMOperand(rm));
+      return True;
+   }
+
+   /* ------------------- fmv.x.w rd, rs1 ------------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b000 &&
+       INSN(24, 20) == 0b00000 && INSN(31, 25) == 0b1110000) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      if (rd != 0)
+         putIReg32(irsb, rd, unop(Iop_ReinterpF32asI32, getFReg32(rs1)));
+      DIP("fmv.x.w %s, %s\n", nameIReg(rd), nameFReg(rs1));
+      return True;
+   }
+
+   /* ------------- f{eq,lt,le}.s rd, rs1, rs2 -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(31, 25) == 0b1010000) {
+      UInt rd  = INSN(11, 7);
+      UInt rm  = INSN(14, 12);
+      UInt rs1 = INSN(19, 15);
+      UInt rs2 = INSN(24, 20);
+      if (rm != 0b010 && rm != 0b001 && rm != 0b000) {
+         /* Invalid F{EQ,LT,LE}.S, fall through. */
+      } else {
+         IRTemp a1 = newTemp(irsb, Ity_F32);
+         IRTemp a2 = newTemp(irsb, Ity_F32);
+         assign(irsb, a1, getFReg32(rs1));
+         assign(irsb, a2, getFReg32(rs2));
+         if (rd != 0) {
+            IRTemp cmp = newTemp(irsb, Ity_I32);
+            assign(irsb, cmp, binop(Iop_CmpF32, mkexpr(a1), mkexpr(a2)));
+            IRTemp res = newTemp(irsb, Ity_I1);
+            switch (rm) {
+            case 0b010:
+               assign(irsb, res,
+                      binop(Iop_CmpEQ32, mkexpr(cmp), mkU32(Ircr_EQ)));
+               break;
+            case 0b001:
+               assign(irsb, res,
+                      binop(Iop_CmpEQ32, mkexpr(cmp), mkU32(Ircr_LT)));
+               break;
+            case 0b000:
+               assign(irsb, res,
+                      binop(Iop_Or1,
+                            binop(Iop_CmpEQ32, mkexpr(cmp), mkU32(Ircr_LT)),
+                            binop(Iop_CmpEQ32, mkexpr(cmp), mkU32(Ircr_EQ))));
+               break;
+            default:
+               vassert(0);
+            }
+            putIReg64(irsb, rd, unop(Iop_1Uto64, mkexpr(res)));
+         }
+         const HChar* name;
+         const HChar* helper_name;
+         void*        helper_addr;
+         switch (rm) {
+         case 0b010:
+            name        = "feq";
+            helper_name = "riscv64g_calculate_fflags_feq_s";
+            helper_addr = riscv64g_calculate_fflags_feq_s;
+            break;
+         case 0b001:
+            name        = "flt";
+            helper_name = "riscv64g_calculate_fflags_flt_s";
+            helper_addr = riscv64g_calculate_fflags_flt_s;
+            break;
+         case 0b000:
+            name        = "fle";
+            helper_name = "riscv64g_calculate_fflags_fle_s";
+            helper_addr = riscv64g_calculate_fflags_fle_s;
+            break;
+         default:
+            vassert(0);
+         }
+         accumulateFFLAGS(irsb,
+                          mkIRExprCCall(Ity_I32, 0 /*regparms*/, helper_name,
+                                        helper_addr,
+                                        mkIRExprVec_2(mkexpr(a1), mkexpr(a2))));
+         DIP("%s.s %s, %s, %s\n", name, nameIReg(rd), nameFReg(rs1),
+             nameFReg(rs2));
+         return True;
+      }
+   }
+
+   /* ------------------ fclass.s rd, rs1 ------------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b001 &&
+       INSN(24, 20) == 0b00000 && INSN(31, 25) == 0b1110000) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      if (rd != 0)
+         putIReg64(irsb, rd,
+                   mkIRExprCCall(Ity_I64, 0 /*regparms*/,
+                                 "riscv64g_calculate_fclass_s",
+                                 riscv64g_calculate_fclass_s,
+                                 mkIRExprVec_1(getFReg32(rs1))));
+      DIP("fclass.s %s, %s\n", nameIReg(rd), nameFReg(rs1));
+      return True;
+   }
+
+   /* ------------------- fmv.w.x rd, rs1 ------------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b000 &&
+       INSN(24, 20) == 0b00000 && INSN(31, 25) == 0b1111000) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      putFReg32(irsb, rd, unop(Iop_ReinterpI32asF32, getIReg32(rs1)));
+      DIP("fmv.w.x %s, %s\n", nameFReg(rd), nameIReg(rs1));
+      return True;
+   }
+
+   /* -------------- fcvt.s.{w,wu} rd, rs1, rm -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 21) == 0b0000 &&
+       INSN(31, 25) == 0b1101000) {
+      UInt   rd        = INSN(11, 7);
+      UInt   rm        = INSN(14, 12);
+      UInt   rs1       = INSN(19, 15);
+      Bool   is_signed = INSN(20, 20) == 0b0;
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      IRTemp a1 = newTemp(irsb, Ity_I32);
+      assign(irsb, a1, getIReg32(rs1));
+      putFReg32(irsb, rd,
+                binop(is_signed ? Iop_I32StoF32 : Iop_I32UtoF32, mkexpr(rm_IR),
+                      mkexpr(a1)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             is_signed ? "riscv64g_calculate_fflags_fcvt_s_w"
+                                       : "riscv64g_calculate_fflags_fcvt_s_wu",
+                             is_signed ? riscv64g_calculate_fflags_fcvt_s_w
+                                       : riscv64g_calculate_fflags_fcvt_s_wu,
+                             mkIRExprVec_2(mkexpr(a1), mkexpr(rm_RISCV))));
+      DIP("fcvt.s.w%s %s, %s%s\n", is_signed ? "" : "u", nameFReg(rd),
+          nameIReg(rs1), nameRMOperand(rm));
+      return True;
+   }
+
+   /* -------------- fcvt.{l,lu}.s rd, rs1, rm -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 21) == 0b0001 &&
+       INSN(31, 25) == 0b1100000) {
+      UInt   rd        = INSN(11, 7);
+      UInt   rm        = INSN(14, 12);
+      UInt   rs1       = INSN(19, 15);
+      Bool   is_signed = INSN(20, 20) == 0b0;
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      IRTemp a1 = newTemp(irsb, Ity_F32);
+      assign(irsb, a1, getFReg32(rs1));
+      if (rd != 0)
+         putIReg64(irsb, rd,
+                   binop(is_signed ? Iop_F32toI64S : Iop_F32toI64U,
+                         mkexpr(rm_IR), mkexpr(a1)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             is_signed ? "riscv64g_calculate_fflags_fcvt_l_s"
+                                       : "riscv64g_calculate_fflags_fcvt_lu_s",
+                             is_signed ? riscv64g_calculate_fflags_fcvt_l_s
+                                       : riscv64g_calculate_fflags_fcvt_lu_s,
+                             mkIRExprVec_2(mkexpr(a1), mkexpr(rm_RISCV))));
+      DIP("fcvt.l%s.s %s, %s%s\n", is_signed ? "" : "u", nameIReg(rd),
+          nameFReg(rs1), nameRMOperand(rm));
+      return True;
+   }
+
+   /* -------------- fcvt.s.{l,lu} rd, rs1, rm -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 21) == 0b0001 &&
+       INSN(31, 25) == 0b1101000) {
+      UInt   rd        = INSN(11, 7);
+      UInt   rm        = INSN(14, 12);
+      UInt   rs1       = INSN(19, 15);
+      Bool   is_signed = INSN(20, 20) == 0b0;
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      IRTemp a1 = newTemp(irsb, Ity_I64);
+      assign(irsb, a1, getIReg64(rs1));
+      putFReg32(irsb, rd,
+                binop(is_signed ? Iop_I64StoF32 : Iop_I64UtoF32, mkexpr(rm_IR),
+                      mkexpr(a1)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             is_signed ? "riscv64g_calculate_fflags_fcvt_s_l"
+                                       : "riscv64g_calculate_fflags_fcvt_s_lu",
+                             is_signed ? riscv64g_calculate_fflags_fcvt_s_l
+                                       : riscv64g_calculate_fflags_fcvt_s_lu,
+                             mkIRExprVec_2(mkexpr(a1), mkexpr(rm_RISCV))));
+      DIP("fcvt.s.l%s %s, %s%s\n", is_signed ? "" : "u", nameFReg(rd),
+          nameIReg(rs1), nameRMOperand(rm));
+      return True;
+   }
+
+   return False;
+}
+
+static Bool dis_RV64D(/*MB_OUT*/ DisResult* dres,
+                      /*OUT*/ IRSB*         irsb,
+                      UInt                  insn)
+{
+   /* -------------- RV64D standard extension --------------- */
+
+   /* --------------- fld rd, imm[11:0](rs1) ---------------- */
+   if (INSN(6, 0) == 0b0000111 && INSN(14, 12) == 0b011) {
+      UInt  rd      = INSN(11, 7);
+      UInt  rs1     = INSN(19, 15);
+      UInt  imm11_0 = INSN(31, 20);
+      ULong simm    = vex_sx_to_64(imm11_0, 12);
+      putFReg64(irsb, rd,
+                loadLE(Ity_F64, binop(Iop_Add64, getIReg64(rs1), mkU64(simm))));
+      DIP("fld %s, %lld(%s)\n", nameFReg(rd), (Long)simm, nameIReg(rs1));
+      return True;
+   }
+
+   /* --------------- fsd rs2, imm[11:0](rs1) --------------- */
+   if (INSN(6, 0) == 0b0100111 && INSN(14, 12) == 0b011) {
+      UInt  rs1     = INSN(19, 15);
+      UInt  rs2     = INSN(24, 20);
+      UInt  imm11_0 = INSN(31, 25) << 5 | INSN(11, 7);
+      ULong simm    = vex_sx_to_64(imm11_0, 12);
+      storeLE(irsb, binop(Iop_Add64, getIReg64(rs1), mkU64(simm)),
+              getFReg64(rs2));
+      DIP("fsd %s, %lld(%s)\n", nameFReg(rs2), (Long)simm, nameIReg(rs1));
+      return True;
+   }
+
+   /* -------- f{madd,msub}.d rd, rs1, rs2, rs3, rm --------- */
+   /* ------- f{nmsub,nmadd}.d rd, rs1, rs2, rs3, rm -------- */
+   if (INSN(1, 0) == 0b11 && INSN(6, 4) == 0b100 && INSN(26, 25) == 0b01) {
+      UInt   opcode = INSN(6, 0);
+      UInt   rd     = INSN(11, 7);
+      UInt   rm     = INSN(14, 12);
+      UInt   rs1    = INSN(19, 15);
+      UInt   rs2    = INSN(24, 20);
+      UInt   rs3    = INSN(31, 27);
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      const HChar* name;
+      IRTemp       a1 = newTemp(irsb, Ity_F64);
+      IRTemp       a2 = newTemp(irsb, Ity_F64);
+      IRTemp       a3 = newTemp(irsb, Ity_F64);
+      switch (opcode) {
+      case 0b1000011:
+         name = "fmadd";
+         assign(irsb, a1, getFReg64(rs1));
+         assign(irsb, a2, getFReg64(rs2));
+         assign(irsb, a3, getFReg64(rs3));
+         break;
+      case 0b1000111:
+         name = "fmsub";
+         assign(irsb, a1, getFReg64(rs1));
+         assign(irsb, a2, getFReg64(rs2));
+         assign(irsb, a3, unop(Iop_NegF64, getFReg64(rs3)));
+         break;
+      case 0b1001011:
+         name = "fnmsub";
+         assign(irsb, a1, unop(Iop_NegF64, getFReg64(rs1)));
+         assign(irsb, a2, getFReg64(rs2));
+         assign(irsb, a3, getFReg64(rs3));
+         break;
+      case 0b1001111:
+         name = "fnmadd";
+         assign(irsb, a1, unop(Iop_NegF64, getFReg64(rs1)));
+         assign(irsb, a2, getFReg64(rs2));
+         assign(irsb, a3, unop(Iop_NegF64, getFReg64(rs3)));
+         break;
+      default:
+         vassert(0);
+      }
+      putFReg64(
+         irsb, rd,
+         qop(Iop_MAddF64, mkexpr(rm_IR), mkexpr(a1), mkexpr(a2), mkexpr(a3)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             "riscv64g_calculate_fflags_fmadd_d",
+                             riscv64g_calculate_fflags_fmadd_d,
+                             mkIRExprVec_4(mkexpr(a1), mkexpr(a2), mkexpr(a3),
+                                           mkexpr(rm_RISCV))));
+      DIP("%s.d %s, %s, %s, %s%s\n", name, nameFReg(rd), nameFReg(rs1),
+          nameFReg(rs2), nameFReg(rs3), nameRMOperand(rm));
+      return True;
+   }
+
+   /* ------------ f{add,sub}.d rd, rs1, rs2, rm ------------ */
+   /* ------------ f{mul,div}.d rd, rs1, rs2, rm ------------ */
+   if (INSN(6, 0) == 0b1010011 && INSN(26, 25) == 0b01 &&
+       INSN(31, 29) == 0b000) {
+      UInt   rd     = INSN(11, 7);
+      UInt   rm     = INSN(14, 12);
+      UInt   rs1    = INSN(19, 15);
+      UInt   rs2    = INSN(24, 20);
+      UInt   funct7 = INSN(31, 25);
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      const HChar* name;
+      IROp         op;
+      IRTemp       a1 = newTemp(irsb, Ity_F64);
+      IRTemp       a2 = newTemp(irsb, Ity_F64);
+      const HChar* helper_name;
+      void*        helper_addr;
+      switch (funct7) {
+      case 0b0000001:
+         name = "fadd";
+         op   = Iop_AddF64;
+         assign(irsb, a1, getFReg64(rs1));
+         assign(irsb, a2, getFReg64(rs2));
+         helper_name = "riscv64g_calculate_fflags_fadd_d";
+         helper_addr = riscv64g_calculate_fflags_fadd_d;
+         break;
+      case 0b0000101:
+         name = "fsub";
+         op   = Iop_AddF64;
+         assign(irsb, a1, getFReg64(rs1));
+         assign(irsb, a2, unop(Iop_NegF64, getFReg64(rs2)));
+         helper_name = "riscv64g_calculate_fflags_fadd_d";
+         helper_addr = riscv64g_calculate_fflags_fadd_d;
+         break;
+      case 0b0001001:
+         name = "fmul";
+         op   = Iop_MulF64;
+         assign(irsb, a1, getFReg64(rs1));
+         assign(irsb, a2, getFReg64(rs2));
+         helper_name = "riscv64g_calculate_fflags_fmul_d";
+         helper_addr = riscv64g_calculate_fflags_fmul_d;
+         break;
+      case 0b0001101:
+         name = "fdiv";
+         op   = Iop_DivF64;
+         assign(irsb, a1, getFReg64(rs1));
+         assign(irsb, a2, getFReg64(rs2));
+         helper_name = "riscv64g_calculate_fflags_fdiv_d";
+         helper_addr = riscv64g_calculate_fflags_fdiv_d;
+         break;
+      default:
+         vassert(0);
+      }
+      putFReg64(irsb, rd, triop(op, mkexpr(rm_IR), mkexpr(a1), mkexpr(a2)));
+      accumulateFFLAGS(irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/, helper_name,
+                                           helper_addr,
+                                           mkIRExprVec_3(mkexpr(a1), mkexpr(a2),
+                                                         mkexpr(rm_RISCV))));
+      DIP("%s.d %s, %s, %s%s\n", name, nameFReg(rd), nameFReg(rs1),
+          nameFReg(rs2), nameRMOperand(rm));
+      return True;
+   }
+
+   /* ----------------- fsqrt.d rd, rs1, rm ----------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 20) == 0b00000 &&
+       INSN(31, 25) == 0b0101101) {
+      UInt   rd  = INSN(11, 7);
+      UInt   rm  = INSN(14, 12);
+      UInt   rs1 = INSN(19, 15);
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      IRTemp a1 = newTemp(irsb, Ity_F64);
+      assign(irsb, a1, getFReg64(rs1));
+      putFReg64(irsb, rd, binop(Iop_SqrtF64, mkexpr(rm_IR), mkexpr(a1)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             "riscv64g_calculate_fflags_fsqrt_d",
+                             riscv64g_calculate_fflags_fsqrt_d,
+                             mkIRExprVec_2(mkexpr(a1), mkexpr(rm_RISCV))));
+      DIP("fsqrt.d %s, %s%s\n", nameFReg(rd), nameFReg(rs1), nameRMOperand(rm));
+      return True;
+   }
+
+   /* ---------------- fsgnj.d rd, rs1, rs2 ----------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b000 &&
+       INSN(31, 25) == 0b0010001) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      UInt rs2 = INSN(24, 20);
+      if (rs1 == rs2) {
+         putFReg64(irsb, rd, getFReg64(rs1));
+         DIP("fmv.d %s, %s\n", nameFReg(rd), nameIReg(rs1));
+      } else {
+         putFReg64(
+            irsb, rd,
+            unop(Iop_ReinterpI64asF64,
+                 binop(
+                    Iop_Or64,
+                    binop(Iop_And64, unop(Iop_ReinterpF64asI64, getFReg64(rs1)),
+                          mkU64(0x7fffffffffffffff)),
+                    binop(Iop_And64, unop(Iop_ReinterpF64asI64, getFReg64(rs2)),
+                          mkU64(0x8000000000000000)))));
+         DIP("fsgnj.d %s, %s, %s\n", nameFReg(rd), nameIReg(rs1),
+             nameIReg(rs2));
+      }
+      return True;
+   }
+
+   /* ---------------- fsgnjn.d rd, rs1, rs2 ---------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b001 &&
+       INSN(31, 25) == 0b0010001) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      UInt rs2 = INSN(24, 20);
+      if (rs1 == rs2) {
+         putFReg64(irsb, rd, unop(Iop_NegF64, getFReg64(rs1)));
+         DIP("fneg.d %s, %s\n", nameFReg(rd), nameIReg(rs1));
+      } else {
+         putFReg64(irsb, rd,
+                   unop(Iop_ReinterpI64asF64,
+                        binop(Iop_Or64,
+                              binop(Iop_And64,
+                                    unop(Iop_ReinterpF64asI64, getFReg64(rs1)),
+                                    mkU64(0x7fffffffffffffff)),
+                              binop(Iop_And64,
+                                    unop(Iop_ReinterpF64asI64,
+                                         unop(Iop_NegF64, getFReg64(rs2))),
+                                    mkU64(0x8000000000000000)))));
+         DIP("fsgnjn.d %s, %s, %s\n", nameFReg(rd), nameIReg(rs1),
+             nameIReg(rs2));
+      }
+      return True;
+   }
+
+   /* ---------------- fsgnjx.d rd, rs1, rs2 ---------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b010 &&
+       INSN(31, 25) == 0b0010001) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      UInt rs2 = INSN(24, 20);
+      if (rs1 == rs2) {
+         putFReg64(irsb, rd, unop(Iop_AbsF64, getFReg64(rs1)));
+         DIP("fabs.d %s, %s\n", nameFReg(rd), nameIReg(rs1));
+      } else {
+         putFReg64(
+            irsb, rd,
+            unop(Iop_ReinterpI64asF64,
+                 binop(Iop_Xor64, unop(Iop_ReinterpF64asI64, getFReg64(rs1)),
+                       binop(Iop_And64,
+                             unop(Iop_ReinterpF64asI64, getFReg64(rs2)),
+                             mkU64(0x8000000000000000)))));
+         DIP("fsgnjx.d %s, %s, %s\n", nameFReg(rd), nameIReg(rs1),
+             nameIReg(rs2));
+      }
+      return True;
+   }
+
+   /* -------------- f{min,max}.d rd, rs1, rs2 -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(31, 25) == 0b0010101) {
+      UInt rd  = INSN(11, 7);
+      UInt rm  = INSN(14, 12);
+      UInt rs1 = INSN(19, 15);
+      UInt rs2 = INSN(24, 20);
+      if (rm != 0b000 && rm != 0b001) {
+         /* Invalid F{MIN,MAX}.D, fall through. */
+      } else {
+         const HChar* name;
+         IROp         op;
+         const HChar* helper_name;
+         void*        helper_addr;
+         switch (rm) {
+         case 0b000:
+            name        = "fmin";
+            op          = Iop_MinNumF64;
+            helper_name = "riscv64g_calculate_fflags_fmin_d";
+            helper_addr = riscv64g_calculate_fflags_fmin_d;
+            break;
+         case 0b001:
+            name        = "fmax";
+            op          = Iop_MaxNumF64;
+            helper_name = "riscv64g_calculate_fflags_fmax_d";
+            helper_addr = riscv64g_calculate_fflags_fmax_d;
+            break;
+         default:
+            vassert(0);
+         }
+         IRTemp a1 = newTemp(irsb, Ity_F64);
+         IRTemp a2 = newTemp(irsb, Ity_F64);
+         assign(irsb, a1, getFReg64(rs1));
+         assign(irsb, a2, getFReg64(rs2));
+         putFReg64(irsb, rd, binop(op, mkexpr(a1), mkexpr(a2)));
+         accumulateFFLAGS(irsb,
+                          mkIRExprCCall(Ity_I32, 0 /*regparms*/, helper_name,
+                                        helper_addr,
+                                        mkIRExprVec_2(mkexpr(a1), mkexpr(a2))));
+         DIP("%s.d %s, %s, %s\n", name, nameFReg(rd), nameFReg(rs1),
+             nameFReg(rs2));
+         return True;
+      }
+   }
+
+   /* ---------------- fcvt.s.d rd, rs1, rm ----------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 20) == 0b00001 &&
+       INSN(31, 25) == 0b0100000) {
+      UInt   rd  = INSN(11, 7);
+      UInt   rm  = INSN(14, 12);
+      UInt   rs1 = INSN(19, 15);
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      IRTemp a1 = newTemp(irsb, Ity_F64);
+      assign(irsb, a1, getFReg64(rs1));
+      putFReg32(irsb, rd, binop(Iop_F64toF32, mkexpr(rm_IR), mkexpr(a1)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             "riscv64g_calculate_fflags_fcvt_s_d",
+                             riscv64g_calculate_fflags_fcvt_s_d,
+                             mkIRExprVec_2(mkexpr(a1), mkexpr(rm_RISCV))));
+      DIP("fcvt.s.d %s, %s%s\n", nameFReg(rd), nameFReg(rs1),
+          nameRMOperand(rm));
+      return True;
+   }
+
+   /* ---------------- fcvt.d.s rd, rs1, rm ----------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 20) == 0b00000 &&
+       INSN(31, 25) == 0b0100001) {
+      UInt rd  = INSN(11, 7);
+      UInt rm  = INSN(14, 12); /* Ignored as the result is always exact. */
+      UInt rs1 = INSN(19, 15);
+      putFReg64(irsb, rd, unop(Iop_F32toF64, getFReg32(rs1)));
+      DIP("fcvt.d.s %s, %s%s\n", nameFReg(rd), nameFReg(rs1),
+          nameRMOperand(rm));
+      return True;
+   }
+
+   /* ------------- f{eq,lt,le}.d rd, rs1, rs2 -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(31, 25) == 0b1010001) {
+      UInt rd  = INSN(11, 7);
+      UInt rm  = INSN(14, 12);
+      UInt rs1 = INSN(19, 15);
+      UInt rs2 = INSN(24, 20);
+      if (rm != 0b010 && rm != 0b001 && rm != 0b000) {
+         /* Invalid F{EQ,LT,LE}.D, fall through. */
+      } else {
+         IRTemp a1 = newTemp(irsb, Ity_F64);
+         IRTemp a2 = newTemp(irsb, Ity_F64);
+         assign(irsb, a1, getFReg64(rs1));
+         assign(irsb, a2, getFReg64(rs2));
+         if (rd != 0) {
+            IRTemp cmp = newTemp(irsb, Ity_I32);
+            assign(irsb, cmp, binop(Iop_CmpF64, mkexpr(a1), mkexpr(a2)));
+            IRTemp res = newTemp(irsb, Ity_I1);
+            switch (rm) {
+            case 0b010:
+               assign(irsb, res,
+                      binop(Iop_CmpEQ32, mkexpr(cmp), mkU32(Ircr_EQ)));
+               break;
+            case 0b001:
+               assign(irsb, res,
+                      binop(Iop_CmpEQ32, mkexpr(cmp), mkU32(Ircr_LT)));
+               break;
+            case 0b000:
+               assign(irsb, res,
+                      binop(Iop_Or1,
+                            binop(Iop_CmpEQ32, mkexpr(cmp), mkU32(Ircr_LT)),
+                            binop(Iop_CmpEQ32, mkexpr(cmp), mkU32(Ircr_EQ))));
+               break;
+            default:
+               vassert(0);
+            }
+            putIReg64(irsb, rd, unop(Iop_1Uto64, mkexpr(res)));
+         }
+         const HChar* name;
+         const HChar* helper_name;
+         void*        helper_addr;
+         switch (rm) {
+         case 0b010:
+            name        = "feq";
+            helper_name = "riscv64g_calculate_fflags_feq_d";
+            helper_addr = riscv64g_calculate_fflags_feq_d;
+            break;
+         case 0b001:
+            name        = "flt";
+            helper_name = "riscv64g_calculate_fflags_flt_d";
+            helper_addr = riscv64g_calculate_fflags_flt_d;
+            break;
+         case 0b000:
+            name        = "fle";
+            helper_name = "riscv64g_calculate_fflags_fle_d";
+            helper_addr = riscv64g_calculate_fflags_fle_d;
+            break;
+         default:
+            vassert(0);
+         }
+         accumulateFFLAGS(irsb,
+                          mkIRExprCCall(Ity_I32, 0 /*regparms*/, helper_name,
+                                        helper_addr,
+                                        mkIRExprVec_2(mkexpr(a1), mkexpr(a2))));
+         DIP("%s.d %s, %s, %s\n", name, nameIReg(rd), nameFReg(rs1),
+             nameFReg(rs2));
+         return True;
+      }
+   }
+
+   /* ------------------ fclass.d rd, rs1 ------------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b001 &&
+       INSN(24, 20) == 0b00000 && INSN(31, 25) == 0b1110001) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      if (rd != 0)
+         putIReg64(irsb, rd,
+                   mkIRExprCCall(Ity_I64, 0 /*regparms*/,
+                                 "riscv64g_calculate_fclass_d",
+                                 riscv64g_calculate_fclass_d,
+                                 mkIRExprVec_1(getFReg64(rs1))));
+      DIP("fclass.d %s, %s\n", nameIReg(rd), nameFReg(rs1));
+      return True;
+   }
+
+   /* -------------- fcvt.{w,wu}.d rd, rs1, rm -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 21) == 0b0000 &&
+       INSN(31, 25) == 0b1100001) {
+      UInt   rd        = INSN(11, 7);
+      UInt   rm        = INSN(14, 12);
+      UInt   rs1       = INSN(19, 15);
+      Bool   is_signed = INSN(20, 20) == 0b0;
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      IRTemp a1 = newTemp(irsb, Ity_F64);
+      assign(irsb, a1, getFReg64(rs1));
+      if (rd != 0)
+         putIReg32(irsb, rd,
+                   binop(is_signed ? Iop_F64toI32S : Iop_F64toI32U,
+                         mkexpr(rm_IR), mkexpr(a1)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             is_signed ? "riscv64g_calculate_fflags_fcvt_w_d"
+                                       : "riscv64g_calculate_fflags_fcvt_wu_d",
+                             is_signed ? riscv64g_calculate_fflags_fcvt_w_d
+                                       : riscv64g_calculate_fflags_fcvt_wu_d,
+                             mkIRExprVec_2(mkexpr(a1), mkexpr(rm_RISCV))));
+      DIP("fcvt.w%s.d %s, %s%s\n", is_signed ? "" : "u", nameIReg(rd),
+          nameFReg(rs1), nameRMOperand(rm));
+      return True;
+   }
+
+   /* -------------- fcvt.d.{w,wu} rd, rs1, rm -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 21) == 0b0000 &&
+       INSN(31, 25) == 0b1101001) {
+      UInt rd  = INSN(11, 7);
+      UInt rm  = INSN(14, 12); /* Ignored as the result is always exact. */
+      UInt rs1 = INSN(19, 15);
+      Bool is_signed = INSN(20, 20) == 0b0;
+      putFReg64(
+         irsb, rd,
+         unop(is_signed ? Iop_I32StoF64 : Iop_I32UtoF64, getIReg32(rs1)));
+      DIP("fcvt.d.w%s %s, %s%s\n", is_signed ? "" : "u", nameFReg(rd),
+          nameIReg(rs1), nameRMOperand(rm));
+      return True;
+   }
+
+   /* -------------- fcvt.{l,lu}.d rd, rs1, rm -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 21) == 0b0001 &&
+       INSN(31, 25) == 0b1100001) {
+      UInt   rd        = INSN(11, 7);
+      UInt   rm        = INSN(14, 12);
+      UInt   rs1       = INSN(19, 15);
+      Bool   is_signed = INSN(20, 20) == 0b0;
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      IRTemp a1 = newTemp(irsb, Ity_F64);
+      assign(irsb, a1, getFReg64(rs1));
+      if (rd != 0)
+         putIReg64(irsb, rd,
+                   binop(is_signed ? Iop_F64toI64S : Iop_F64toI64U,
+                         mkexpr(rm_IR), mkexpr(a1)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             is_signed ? "riscv64g_calculate_fflags_fcvt_l_d"
+                                       : "riscv64g_calculate_fflags_fcvt_lu_d",
+                             is_signed ? riscv64g_calculate_fflags_fcvt_l_d
+                                       : riscv64g_calculate_fflags_fcvt_lu_d,
+                             mkIRExprVec_2(mkexpr(a1), mkexpr(rm_RISCV))));
+      DIP("fcvt.l%s.d %s, %s%s\n", is_signed ? "" : "u", nameIReg(rd),
+          nameFReg(rs1), nameRMOperand(rm));
+      return True;
+   }
+
+   /* ------------------- fmv.x.d rd, rs1 ------------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b000 &&
+       INSN(24, 20) == 0b00000 && INSN(31, 25) == 0b1110001) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      if (rd != 0)
+         putIReg64(irsb, rd, unop(Iop_ReinterpF64asI64, getFReg64(rs1)));
+      DIP("fmv.x.d %s, %s\n", nameIReg(rd), nameFReg(rs1));
+      return True;
+   }
+
+   /* -------------- fcvt.d.{l,lu} rd, rs1, rm -------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(24, 21) == 0b0001 &&
+       INSN(31, 25) == 0b1101001) {
+      UInt   rd        = INSN(11, 7);
+      UInt   rm        = INSN(14, 12);
+      UInt   rs1       = INSN(19, 15);
+      Bool   is_signed = INSN(20, 20) == 0b0;
+      IRTemp rm_RISCV, rm_IR;
+      mk_get_rounding_mode(irsb, &rm_RISCV, &rm_IR, rm);
+      IRTemp a1 = newTemp(irsb, Ity_I64);
+      assign(irsb, a1, getIReg64(rs1));
+      putFReg64(irsb, rd,
+                binop(is_signed ? Iop_I64StoF64 : Iop_I64UtoF64, mkexpr(rm_IR),
+                      mkexpr(a1)));
+      accumulateFFLAGS(
+         irsb, mkIRExprCCall(Ity_I32, 0 /*regparms*/,
+                             is_signed ? "riscv64g_calculate_fflags_fcvt_d_l"
+                                       : "riscv64g_calculate_fflags_fcvt_d_lu",
+                             is_signed ? riscv64g_calculate_fflags_fcvt_d_l
+                                       : riscv64g_calculate_fflags_fcvt_d_lu,
+                             mkIRExprVec_2(mkexpr(a1), mkexpr(rm_RISCV))));
+      DIP("fcvt.d.l%s %s, %s%s\n", is_signed ? "" : "u", nameFReg(rd),
+          nameIReg(rs1), nameRMOperand(rm));
+      return True;
+   }
+
+   /* ------------------- fmv.d.x rd, rs1 ------------------- */
+   if (INSN(6, 0) == 0b1010011 && INSN(14, 12) == 0b000 &&
+       INSN(24, 20) == 0b00000 && INSN(31, 25) == 0b1111001) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      putFReg64(irsb, rd, unop(Iop_ReinterpI64asF64, getIReg64(rs1)));
+      DIP("fmv.d.x %s, %s\n", nameFReg(rd), nameIReg(rs1));
+      return True;
+   }
+
+   return False;
+}
+
+static Bool dis_RV64Zicsr(/*MB_OUT*/ DisResult* dres,
+                          /*OUT*/ IRSB*         irsb,
+                          UInt                  insn)
+{
+   /* ------------ RV64Zicsr standard extension ------------- */
+
+   /* ----------------- csrrw rd, csr, rs1 ------------------ */
+   if (INSN(6, 0) == 0b1110011 && INSN(14, 12) == 0b001) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      UInt csr = INSN(31, 20);
+      if (csr != 0x001 && csr != 0x002 && csr != 0x003) {
+         /* Invalid CSRRW, fall through. */
+      } else {
+         switch (csr) {
+         case 0x001: {
+            /* fflags */
+            IRTemp fcsr = newTemp(irsb, Ity_I32);
+            assign(irsb, fcsr, getFCSR());
+            if (rd != 0)
+               putIReg64(irsb, rd,
+                         unop(Iop_32Uto64,
+                              binop(Iop_And32, mkexpr(fcsr), mkU32(0x1f))));
+            putFCSR(irsb,
+                    binop(Iop_Or32,
+                          binop(Iop_And32, mkexpr(fcsr), mkU32(0xffffffe0)),
+                          binop(Iop_And32, getIReg32(rs1), mkU32(0x1f))));
+            break;
+         }
+         case 0x002: {
+            /* frm */
+            IRTemp fcsr = newTemp(irsb, Ity_I32);
+            assign(irsb, fcsr, getFCSR());
+            if (rd != 0)
+               putIReg64(
+                  irsb, rd,
+                  unop(Iop_32Uto64,
+                       binop(Iop_And32, binop(Iop_Shr32, mkexpr(fcsr), mkU8(5)),
+                             mkU32(0x7))));
+            putFCSR(irsb,
+                    binop(Iop_Or32,
+                          binop(Iop_And32, mkexpr(fcsr), mkU32(0xffffff1f)),
+                          binop(Iop_Shl32,
+                                binop(Iop_And32, getIReg32(rs1), mkU32(0x7)),
+                                mkU8(5))));
+            break;
+         }
+         case 0x003: {
+            /* fcsr */
+            IRTemp fcsr = newTemp(irsb, Ity_I32);
+            assign(irsb, fcsr, getFCSR());
+            if (rd != 0)
+               putIReg64(irsb, rd, unop(Iop_32Uto64, mkexpr(fcsr)));
+            putFCSR(irsb, binop(Iop_And32, getIReg32(rs1), mkU32(0xff)));
+            break;
+         }
+         default:
+            vassert(0);
+         }
+         DIP("csrrs %s, %s, %s\n", nameIReg(rd), nameCSR(csr), nameIReg(rs1));
+         return True;
+      }
+   }
+
+   /* ----------------- csrrs rd, csr, rs1 ------------------ */
+   if (INSN(6, 0) == 0b1110011 && INSN(14, 12) == 0b010) {
+      UInt rd  = INSN(11, 7);
+      UInt rs1 = INSN(19, 15);
+      UInt csr = INSN(31, 20);
+      if (csr != 0x001 && csr != 0x002 && csr != 0x003) {
+         /* Invalid CSRRS, fall through. */
+      } else {
+         switch (csr) {
+         case 0x001: {
+            /* fflags */
+            IRTemp fcsr = newTemp(irsb, Ity_I32);
+            assign(irsb, fcsr, getFCSR());
+            if (rd != 0)
+               putIReg64(irsb, rd,
+                         unop(Iop_32Uto64,
+                              binop(Iop_And32, mkexpr(fcsr), mkU32(0x1f))));
+            putFCSR(irsb, binop(Iop_Or32, mkexpr(fcsr),
+                                binop(Iop_And32, getIReg32(rs1), mkU32(0x1f))));
+            break;
+         }
+         case 0x002: {
+            /* frm */
+            IRTemp fcsr = newTemp(irsb, Ity_I32);
+            assign(irsb, fcsr, getFCSR());
+            if (rd != 0)
+               putIReg64(
+                  irsb, rd,
+                  unop(Iop_32Uto64,
+                       binop(Iop_And32, binop(Iop_Shr32, mkexpr(fcsr), mkU8(5)),
+                             mkU32(0x7))));
+            putFCSR(irsb,
+                    binop(Iop_Or32, mkexpr(fcsr),
+                          binop(Iop_Shl32,
+                                binop(Iop_And32, getIReg32(rs1), mkU32(0x7)),
+                                mkU8(5))));
+            break;
+         }
+         case 0x003: {
+            /* fcsr */
+            IRTemp fcsr = newTemp(irsb, Ity_I32);
+            assign(irsb, fcsr, getFCSR());
+            if (rd != 0)
+               putIReg64(irsb, rd, unop(Iop_32Uto64, mkexpr(fcsr)));
+            putFCSR(irsb, binop(Iop_Or32, mkexpr(fcsr),
+                                binop(Iop_And32, getIReg32(rs1), mkU32(0xff))));
+            break;
+         }
+         default:
+            vassert(0);
+         }
+         DIP("csrrs %s, %s, %s\n", nameIReg(rd), nameCSR(csr), nameIReg(rs1));
+         return True;
+      }
+   }
+
+   return False;
+}
+
+static Bool dis_RISCV64_standard(/*MB_OUT*/ DisResult* dres,
+                                 /*OUT*/ IRSB*         irsb,
+                                 UInt                  insn,
+                                 Addr                  guest_pc_curr_instr,
+                                 const VexAbiInfo*     abiinfo,
+                                 Bool                  sigill_diag)
+{
+   vassert(INSN(1, 0) == 0b11);
+
+   Bool ok = False;
+   if (!ok)
+      ok = dis_RV64I(dres, irsb, insn, guest_pc_curr_instr);
+   if (!ok)
+      ok = dis_RV64M(dres, irsb, insn);
+   if (!ok)
+      ok = dis_RV64A(dres, irsb, insn, guest_pc_curr_instr, abiinfo);
+   if (!ok)
+      ok = dis_RV64F(dres, irsb, insn);
+   if (!ok)
+      ok = dis_RV64D(dres, irsb, insn);
+   if (!ok)
+      ok = dis_RV64Zicsr(dres, irsb, insn);
+   if (ok)
+      return True;
+
+   if (sigill_diag)
+      vex_printf("RISCV64 front end: standard\n");
+   return False;
+}
+
+/* Disassemble a single riscv64 instruction into IR. Returns True iff the
+   instruction was decoded, in which case *dres will be set accordingly, or
+   False, in which case *dres should be ignored by the caller. */
+static Bool disInstr_RISCV64_WRK(/*MB_OUT*/ DisResult* dres,
+                                 /*OUT*/ IRSB*         irsb,
+                                 const UChar*          guest_instr,
+                                 Addr                  guest_pc_curr_instr,
+                                 const VexArchInfo*    archinfo,
+                                 const VexAbiInfo*     abiinfo,
+                                 Bool                  sigill_diag)
+{
+   /* Set result defaults. */
+   dres->whatNext    = Dis_Continue;
+   dres->len         = 0;
+   dres->jk_StopHere = Ijk_INVALID;
+   dres->hint        = Dis_HintNone;
+
+   /* Read the instruction word. */
+   UInt insn = getInsn(guest_instr);
+
+   if (0)
+      vex_printf("insn: 0x%x\n", insn);
+
+   DIP("\t(riscv64) 0x%llx:  ", (ULong)guest_pc_curr_instr);
+
+   vassert((guest_pc_curr_instr & 1) == 0);
+
+   /* Spot "Special" instructions (see comment at top of file). */
+   {
+      const UChar* code = guest_instr;
+      /* Spot the 16-byte preamble:
+            00305013   srli zero, zero, 3
+            00d05013   srli zero, zero, 13
+            03305013   srli zero, zero, 51
+            03d05013   srli zero, zero, 61
+      */
+      UInt word1 = 0x00305013;
+      UInt word2 = 0x00d05013;
+      UInt word3 = 0x03305013;
+      UInt word4 = 0x03d05013;
+      if (getUIntLittleEndianly(code + 0) == word1 &&
+          getUIntLittleEndianly(code + 4) == word2 &&
+          getUIntLittleEndianly(code + 8) == word3 &&
+          getUIntLittleEndianly(code + 12) == word4) {
+         /* Got a "Special" instruction preamble. Which one is it? */
+         dres->len  = 20;
+         UInt which = getUIntLittleEndianly(code + 16);
+         if (which == 0x00a56533 /* or a0, a0, a0 */) {
+            /* a3 = client_request ( a4 ) */
+            DIP("a3 = client_request ( a4 )\n");
+            putPC(irsb, mkU64(guest_pc_curr_instr + 20));
+            dres->jk_StopHere = Ijk_ClientReq;
+            dres->whatNext    = Dis_StopHere;
+            return True;
+         } else if (which == 0x00b5e5b3 /* or a1, a1, a1 */) {
+            /* a3 = guest_NRADDR */
+            DIP("a3 = guest_NRADDR\n");
+            putIReg64(irsb, 13 /*x13/a3*/, IRExpr_Get(OFFB_NRADDR, Ity_I64));
+            return True;
+         } else if (which == 0x00c66633 /* or a2, a2, a2 */) {
+            /* branch-and-link-to-noredir t0 */
+            DIP("branch-and-link-to-noredir t0\n");
+            putIReg64(irsb, 1 /*x1/ra*/, mkU64(guest_pc_curr_instr + 20));
+            putPC(irsb, getIReg64(5 /*x5/t0*/));
+            dres->jk_StopHere = Ijk_NoRedir;
+            dres->whatNext    = Dis_StopHere;
+            return True;
+         } else if (which == 0x00d6e6b3 /* or a3, a3, a3 */) {
+            /* IR injection */
+            DIP("IR injection\n");
+            vex_inject_ir(irsb, Iend_LE);
+            /* Invalidate the current insn. The reason is that the IRop we're
+               injecting here can change. In which case the translation has to
+               be redone. For ease of handling, we simply invalidate all the
+               time. */
+            stmt(irsb, IRStmt_Put(OFFB_CMSTART, mkU64(guest_pc_curr_instr)));
+            stmt(irsb, IRStmt_Put(OFFB_CMLEN, mkU64(20)));
+            putPC(irsb, mkU64(guest_pc_curr_instr + 20));
+            dres->whatNext    = Dis_StopHere;
+            dres->jk_StopHere = Ijk_InvalICache;
+            return True;
+         }
+         /* We don't know what it is. */
+         return False;
+      }
+   }
+
+   /* Main riscv64 instruction decoder starts here. */
+   Bool ok = False;
+   UInt inst_size;
+
+   /* Parse insn[1:0] to determine whether the instruction is 16-bit
+      (compressed) or 32-bit. */
+   switch (INSN(1, 0)) {
+   case 0b00:
+   case 0b01:
+   case 0b10:
+      dres->len = inst_size = 2;
+      ok = dis_RV64C(dres, irsb, insn, guest_pc_curr_instr, sigill_diag);
+      break;
+
+   case 0b11:
+      dres->len = inst_size = 4;
+      ok = dis_RISCV64_standard(dres, irsb, insn, guest_pc_curr_instr, abiinfo,
+                                sigill_diag);
+      break;
+
+   default:
+      vassert(0); /* Can't happen. */
+   }
+
+   /* If the next-level down decoders failed, make sure dres didn't get
+      changed. */
+   if (!ok) {
+      vassert(dres->whatNext == Dis_Continue);
+      vassert(dres->len == inst_size);
+      vassert(dres->jk_StopHere == Ijk_INVALID);
+   }
+
+   return ok;
+}
+
+#undef INSN
+
+/*------------------------------------------------------------*/
+/*--- Top-level fn                                         ---*/
+/*------------------------------------------------------------*/
+
+/* Disassemble a single instruction into IR. The instruction is located in host
+   memory at &guest_code[delta]. */
+DisResult disInstr_RISCV64(IRSB*              irsb,
+                           const UChar*       guest_code,
+                           Long               delta,
+                           Addr               guest_IP,
+                           VexArch            guest_arch,
+                           const VexArchInfo* archinfo,
+                           const VexAbiInfo*  abiinfo,
+                           VexEndness         host_endness,
+                           Bool               sigill_diag)
+{
+   DisResult dres;
+   vex_bzero(&dres, sizeof(dres));
+
+   vassert(guest_arch == VexArchRISCV64);
+   /* Check that the host is little-endian as getFReg32() and putFReg32() depend
+      on this fact. */
+   vassert(host_endness == VexEndnessLE);
+
+   /* Try to decode. */
+   Bool ok = disInstr_RISCV64_WRK(&dres, irsb, &guest_code[delta], guest_IP,
+                                  archinfo, abiinfo, sigill_diag);
+   if (ok) {
+      /* All decode successes end up here. */
+      vassert(dres.len == 2 || dres.len == 4 || dres.len == 20);
+      switch (dres.whatNext) {
+      case Dis_Continue:
+         putPC(irsb, mkU64(guest_IP + dres.len));
+         break;
+      case Dis_StopHere:
+         break;
+      default:
+         vassert(0);
+      }
+      DIP("\n");
+   } else {
+      /* All decode failures end up here. */
+      if (sigill_diag) {
+         Int   i, j;
+         UChar buf[64];
+         UInt  insn = getInsn(&guest_code[delta]);
+         vex_bzero(buf, sizeof(buf));
+         for (i = j = 0; i < 32; i++) {
+            if (i > 0) {
+               if ((i & 7) == 0)
+                  buf[j++] = ' ';
+               else if ((i & 3) == 0)
+                  buf[j++] = '\'';
+            }
+            buf[j++] = (insn & (1 << (31 - i))) ? '1' : '0';
+         }
+         vex_printf("disInstr(riscv64): unhandled instruction 0x%08x\n", insn);
+         vex_printf("disInstr(riscv64): %s\n", buf);
+      }
+
+      /* Tell the dispatcher that this insn cannot be decoded, and so has not
+         been executed, and (is currently) the next to be executed. The pc
+         register should be up-to-date since it is made so at the start of each
+         insn, but nevertheless be paranoid and update it again right now. */
+      putPC(irsb, mkU64(guest_IP));
+      dres.len         = 0;
+      dres.whatNext    = Dis_StopHere;
+      dres.jk_StopHere = Ijk_NoDecode;
+   }
+   return dres;
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                     guest_riscv64_toIR.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/priv/host_riscv64_defs.c b/priv/host_riscv64_defs.c
new file mode 100644
index 000000000..61a1a0a83
--- /dev/null
+++ b/priv/host_riscv64_defs.c
@@ -0,0 +1,2701 @@
+
+/*--------------------------------------------------------------------*/
+/*--- begin                                    host_riscv64_defs.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2020-2023 Petr Pavlu
+      petr.pavlu@dagobah.cz
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "libvex_trc_values.h"
+
+#include "host_riscv64_defs.h"
+#include "main_util.h"
+
+/*------------------------------------------------------------*/
+/*--- Registers                                            ---*/
+/*------------------------------------------------------------*/
+
+UInt ppHRegRISCV64(HReg reg)
+{
+   static const HChar* inames[32] = {
+      "zero", "ra", "sp", "gp", "tp",  "t0",  "t1", "t2", "s0", "s1", "a0",
+      "a1",   "a2", "a3", "a4", "a5",  "a6",  "a7", "s2", "s3", "s4", "s5",
+      "s6",   "s7", "s8", "s9", "s10", "s11", "t3", "t4", "t5", "t6"};
+
+   static const HChar* fnames[32] = {
+      "ft0", "ft1", "ft2",  "ft3",  "ft4", "ft5", "ft6",  "ft7",
+      "fs0", "fs1", "fa0",  "fa1",  "fa2", "fa3", "fa4",  "fa5",
+      "fa6", "fa7", "fs2",  "fs3",  "fs4", "fs5", "fs6",  "fs7",
+      "fs8", "fs9", "fs10", "fs11", "ft8", "ft9", "ft10", "ft11"};
+
+   /* Be generic for all virtual regs. */
+   if (hregIsVirtual(reg))
+      return ppHReg(reg);
+
+   /* Be specific for real regs. */
+   switch (hregClass(reg)) {
+   case HRcInt64: {
+      UInt r = hregEncoding(reg);
+      vassert(r < 32);
+      return vex_printf("%s", inames[r]);
+   }
+   case HRcFlt64: {
+      UInt r = hregEncoding(reg);
+      vassert(r < 32);
+      return vex_printf("%s", fnames[r]);
+   }
+   default:
+      vpanic("ppHRegRISCV64");
+   }
+}
+
+static inline UInt iregEnc(HReg r)
+{
+   vassert(hregClass(r) == HRcInt64);
+   vassert(!hregIsVirtual(r));
+
+   UInt n = hregEncoding(r);
+   vassert(n < 32);
+   return n;
+}
+
+static inline UInt fregEnc(HReg r)
+{
+   UInt n;
+   vassert(hregClass(r) == HRcFlt64);
+   vassert(!hregIsVirtual(r));
+   n = hregEncoding(r);
+   vassert(n < 32);
+   return n;
+}
+
+/*------------------------------------------------------------*/
+/*--- Instructions                                         ---*/
+/*------------------------------------------------------------*/
+
+static const HChar* showRISCV64CSR(UInt csr)
+{
+   switch (csr) {
+   case 0x001:
+      return "fflags";
+   case 0x002:
+      return "frm";
+   case 0x003:
+      return "fcsr";
+   }
+   vpanic("showRISCV64CSR");
+}
+
+static const HChar* showRISCV64ALUOp(RISCV64ALUOp op)
+{
+   switch (op) {
+   case RISCV64op_ADD:
+      return "add";
+   case RISCV64op_SUB:
+      return "sub";
+   case RISCV64op_ADDW:
+      return "addw";
+   case RISCV64op_SUBW:
+      return "subw";
+   case RISCV64op_XOR:
+      return "xor";
+   case RISCV64op_OR:
+      return "or";
+   case RISCV64op_AND:
+      return "and";
+   case RISCV64op_SLL:
+      return "sll";
+   case RISCV64op_SRL:
+      return "srl";
+   case RISCV64op_SRA:
+      return "sra";
+   case RISCV64op_SLLW:
+      return "sllw";
+   case RISCV64op_SRLW:
+      return "srlw";
+   case RISCV64op_SRAW:
+      return "sraw";
+   case RISCV64op_SLT:
+      return "slt";
+   case RISCV64op_SLTU:
+      return "sltu";
+   case RISCV64op_MUL:
+      return "mul";
+   case RISCV64op_MULH:
+      return "mulh";
+   case RISCV64op_MULHU:
+      return "mulhu";
+   case RISCV64op_DIV:
+      return "div";
+   case RISCV64op_DIVU:
+      return "divu";
+   case RISCV64op_REM:
+      return "rem";
+   case RISCV64op_REMU:
+      return "remu";
+   case RISCV64op_MULW:
+      return "mulw";
+   case RISCV64op_DIVW:
+      return "divw";
+   case RISCV64op_DIVUW:
+      return "divuw";
+   case RISCV64op_REMW:
+      return "remw";
+   case RISCV64op_REMUW:
+      return "remuw";
+   }
+   vpanic("showRISCV64ALUOp");
+}
+
+static const HChar* showRISCV64ALUImmOp(RISCV64ALUImmOp op)
+{
+   switch (op) {
+   case RISCV64op_ADDI:
+      return "addi";
+   case RISCV64op_ADDIW:
+      return "addiw";
+   case RISCV64op_XORI:
+      return "xori";
+   case RISCV64op_ANDI:
+      return "andi";
+   case RISCV64op_SLLI:
+      return "slli";
+   case RISCV64op_SRLI:
+      return "srli";
+   case RISCV64op_SRAI:
+      return "srai";
+   case RISCV64op_SLTIU:
+      return "sltiu";
+   }
+   vpanic("showRISCV64ALUImmOp");
+}
+
+static const HChar* showRISCV64LoadOp(RISCV64LoadOp op)
+{
+   switch (op) {
+   case RISCV64op_LD:
+      return "ld";
+   case RISCV64op_LW:
+      return "lw";
+   case RISCV64op_LH:
+      return "lh";
+   case RISCV64op_LB:
+      return "lb";
+   }
+   vpanic("showRISCV64LoadOp");
+}
+
+static const HChar* showRISCV64StoreOp(RISCV64StoreOp op)
+{
+   switch (op) {
+   case RISCV64op_SD:
+      return "sd";
+   case RISCV64op_SW:
+      return "sw";
+   case RISCV64op_SH:
+      return "sh";
+   case RISCV64op_SB:
+      return "sb";
+   }
+   vpanic("showRISCV64StoreOp");
+}
+
+static const HChar* showRISCV64LoadROp(RISCV64LoadROp op)
+{
+   switch (op) {
+   case RISCV64op_LR_W:
+      return "lr.w";
+   }
+   vpanic("showRISCV64LoadROp");
+}
+
+static const HChar* showRISCV64StoreCOp(RISCV64StoreCOp op)
+{
+   switch (op) {
+   case RISCV64op_SC_W:
+      return "sc.w";
+   }
+   vpanic("showRISCV64StoreCOp");
+}
+
+static const HChar* showRISCV64FpUnaryOp(RISCV64FpUnaryOp op)
+{
+   switch (op) {
+   case RISCV64op_FSQRT_S:
+      return "fsqrt.s";
+   case RISCV64op_FSQRT_D:
+      return "fsqrt.d";
+   }
+   vpanic("showRISCV64FpUnaryOp");
+}
+
+static const HChar* showRISCV64FpBinaryOp(RISCV64FpBinaryOp op)
+{
+   switch (op) {
+   case RISCV64op_FADD_S:
+      return "fadd.s";
+   case RISCV64op_FMUL_S:
+      return "fmul.s";
+   case RISCV64op_FDIV_S:
+      return "fdiv.s";
+   case RISCV64op_FSGNJN_S:
+      return "fsgnjn.s";
+   case RISCV64op_FSGNJX_S:
+      return "fsgnjx.s";
+   case RISCV64op_FMIN_S:
+      return "fmin.s";
+   case RISCV64op_FMAX_S:
+      return "fmax.s";
+   case RISCV64op_FADD_D:
+      return "fadd.d";
+   case RISCV64op_FSUB_D:
+      return "fsub.d";
+   case RISCV64op_FMUL_D:
+      return "fmul.d";
+   case RISCV64op_FDIV_D:
+      return "fdiv.d";
+   case RISCV64op_FSGNJN_D:
+      return "fsgnjn.d";
+   case RISCV64op_FSGNJX_D:
+      return "fsgnjx.d";
+   case RISCV64op_FMIN_D:
+      return "fmin.d";
+   case RISCV64op_FMAX_D:
+      return "fmax.d";
+   }
+   vpanic("showRISCV64FpBinaryOp");
+}
+
+static const HChar* showRISCV64FpTernaryOp(RISCV64FpTernaryOp op)
+{
+   switch (op) {
+   case RISCV64op_FMADD_S:
+      return "fmadd.s";
+   case RISCV64op_FMADD_D:
+      return "fmadd.d";
+   }
+   vpanic("showRISCV64FpTernaryOp");
+}
+
+static const HChar* showRISCV64FpMoveOp(RISCV64FpMoveOp op)
+{
+   switch (op) {
+   case RISCV64op_FMV_X_W:
+      return "fmv.x.w";
+   case RISCV64op_FMV_W_X:
+      return "fmv.w.x";
+   case RISCV64op_FMV_D:
+      return "fmv.d";
+   case RISCV64op_FMV_X_D:
+      return "fmv.x.d";
+   case RISCV64op_FMV_D_X:
+      return "fmv.d.x";
+   }
+   vpanic("showRISCV64FpMoveOp");
+}
+
+static const HChar* showRISCV64FpConvertOp(RISCV64FpConvertOp op)
+{
+   switch (op) {
+   case RISCV64op_FCVT_W_S:
+      return "fcvt.w.s";
+   case RISCV64op_FCVT_WU_S:
+      return "fcvt.wu.s";
+   case RISCV64op_FCVT_S_W:
+      return "fcvt.s.w";
+   case RISCV64op_FCVT_S_WU:
+      return "fcvt.s.wu";
+   case RISCV64op_FCVT_L_S:
+      return "fcvt.l.s";
+   case RISCV64op_FCVT_LU_S:
+      return "fcvt.lu.s";
+   case RISCV64op_FCVT_S_L:
+      return "fcvt.s.l";
+   case RISCV64op_FCVT_S_LU:
+      return "fcvt.s.lu";
+   case RISCV64op_FCVT_S_D:
+      return "fcvt.s.d";
+   case RISCV64op_FCVT_D_S:
+      return "fcvt.d.s";
+   case RISCV64op_FCVT_W_D:
+      return "fcvt.w.d";
+   case RISCV64op_FCVT_WU_D:
+      return "fcvt.wu.d";
+   case RISCV64op_FCVT_D_W:
+      return "fcvt.d.w";
+   case RISCV64op_FCVT_D_WU:
+      return "fcvt.d.wu";
+   case RISCV64op_FCVT_L_D:
+      return "fcvt.l.d";
+   case RISCV64op_FCVT_LU_D:
+      return "fcvt.lu.d";
+   case RISCV64op_FCVT_D_L:
+      return "fcvt.d.l";
+   case RISCV64op_FCVT_D_LU:
+      return "fcvt.d.lu";
+   }
+   vpanic("showRISCV64FpConvertOp");
+}
+
+static const HChar* showRISCV64FpCompareOp(RISCV64FpCompareOp op)
+{
+   switch (op) {
+   case RISCV64op_FEQ_S:
+      return "feq.s";
+   case RISCV64op_FLT_S:
+      return "flt.s";
+   case RISCV64op_FEQ_D:
+      return "feq.d";
+   case RISCV64op_FLT_D:
+      return "flt.d";
+   }
+   vpanic("showRISCV64FpCompareOp");
+}
+
+static const HChar* showRISCV64FpLdStOp(RISCV64FpLdStOp op)
+{
+   switch (op) {
+   case RISCV64op_FLW:
+      return "flw";
+   case RISCV64op_FLD:
+      return "fld";
+   case RISCV64op_FSW:
+      return "fsw";
+   case RISCV64op_FSD:
+      return "fsd";
+   }
+   vpanic("showRISCV64FpLdStOp");
+}
+
+RISCV64Instr* RISCV64Instr_LI(HReg dst, ULong imm64)
+{
+   RISCV64Instr* i       = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                = RISCV64in_LI;
+   i->RISCV64in.LI.dst   = dst;
+   i->RISCV64in.LI.imm64 = imm64;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_MV(HReg dst, HReg src)
+{
+   RISCV64Instr* i     = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag              = RISCV64in_MV;
+   i->RISCV64in.MV.dst = dst;
+   i->RISCV64in.MV.src = src;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_ALU(RISCV64ALUOp op, HReg dst, HReg src1, HReg src2)
+{
+   RISCV64Instr* i       = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                = RISCV64in_ALU;
+   i->RISCV64in.ALU.op   = op;
+   i->RISCV64in.ALU.dst  = dst;
+   i->RISCV64in.ALU.src1 = src1;
+   i->RISCV64in.ALU.src2 = src2;
+   return i;
+}
+
+RISCV64Instr*
+RISCV64Instr_ALUImm(RISCV64ALUImmOp op, HReg dst, HReg src, Int imm12)
+{
+   RISCV64Instr* i           = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                    = RISCV64in_ALUImm;
+   i->RISCV64in.ALUImm.op    = op;
+   i->RISCV64in.ALUImm.dst   = dst;
+   i->RISCV64in.ALUImm.src   = src;
+   i->RISCV64in.ALUImm.imm12 = imm12;
+   return i;
+}
+
+RISCV64Instr*
+RISCV64Instr_Load(RISCV64LoadOp op, HReg dst, HReg base, Int soff12)
+{
+   RISCV64Instr* i          = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                   = RISCV64in_Load;
+   i->RISCV64in.Load.op     = op;
+   i->RISCV64in.Load.dst    = dst;
+   i->RISCV64in.Load.base   = base;
+   i->RISCV64in.Load.soff12 = soff12;
+   return i;
+}
+
+RISCV64Instr*
+RISCV64Instr_Store(RISCV64StoreOp op, HReg src, HReg base, Int soff12)
+{
+   RISCV64Instr* i           = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                    = RISCV64in_Store;
+   i->RISCV64in.Store.op     = op;
+   i->RISCV64in.Store.src    = src;
+   i->RISCV64in.Store.base   = base;
+   i->RISCV64in.Store.soff12 = soff12;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_LoadR(RISCV64LoadROp op, HReg dst, HReg addr)
+{
+   RISCV64Instr* i         = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                  = RISCV64in_LoadR;
+   i->RISCV64in.LoadR.op   = op;
+   i->RISCV64in.LoadR.dst  = dst;
+   i->RISCV64in.LoadR.addr = addr;
+   return i;
+}
+
+RISCV64Instr*
+RISCV64Instr_StoreC(RISCV64StoreCOp op, HReg res, HReg src, HReg addr)
+{
+   RISCV64Instr* i          = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                   = RISCV64in_StoreC;
+   i->RISCV64in.StoreC.op   = op;
+   i->RISCV64in.StoreC.res  = res;
+   i->RISCV64in.StoreC.src  = src;
+   i->RISCV64in.StoreC.addr = addr;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_CSRRW(HReg dst, HReg src, UInt csr)
+{
+   RISCV64Instr* i        = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                 = RISCV64in_CSRRW;
+   i->RISCV64in.CSRRW.dst = dst;
+   i->RISCV64in.CSRRW.src = src;
+   i->RISCV64in.CSRRW.csr = csr;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_FpUnary(RISCV64FpUnaryOp op, HReg dst, HReg src)
+{
+   RISCV64Instr* i          = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                   = RISCV64in_FpUnary;
+   i->RISCV64in.FpUnary.op  = op;
+   i->RISCV64in.FpUnary.dst = dst;
+   i->RISCV64in.FpUnary.src = src;
+   return i;
+}
+
+RISCV64Instr*
+RISCV64Instr_FpBinary(RISCV64FpBinaryOp op, HReg dst, HReg src1, HReg src2)
+{
+   RISCV64Instr* i            = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                     = RISCV64in_FpBinary;
+   i->RISCV64in.FpBinary.op   = op;
+   i->RISCV64in.FpBinary.dst  = dst;
+   i->RISCV64in.FpBinary.src1 = src1;
+   i->RISCV64in.FpBinary.src2 = src2;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_FpTernary(
+   RISCV64FpTernaryOp op, HReg dst, HReg src1, HReg src2, HReg src3)
+{
+   RISCV64Instr* i             = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                      = RISCV64in_FpTernary;
+   i->RISCV64in.FpTernary.op   = op;
+   i->RISCV64in.FpTernary.dst  = dst;
+   i->RISCV64in.FpTernary.src1 = src1;
+   i->RISCV64in.FpTernary.src2 = src2;
+   i->RISCV64in.FpTernary.src3 = src3;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_FpMove(RISCV64FpMoveOp op, HReg dst, HReg src)
+{
+   RISCV64Instr* i         = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                  = RISCV64in_FpMove;
+   i->RISCV64in.FpMove.op  = op;
+   i->RISCV64in.FpMove.dst = dst;
+   i->RISCV64in.FpMove.src = src;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_FpConvert(RISCV64FpConvertOp op, HReg dst, HReg src)
+{
+   RISCV64Instr* i            = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                     = RISCV64in_FpConvert;
+   i->RISCV64in.FpConvert.op  = op;
+   i->RISCV64in.FpConvert.dst = dst;
+   i->RISCV64in.FpConvert.src = src;
+   return i;
+}
+
+RISCV64Instr*
+RISCV64Instr_FpCompare(RISCV64FpCompareOp op, HReg dst, HReg src1, HReg src2)
+{
+   RISCV64Instr* i             = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                      = RISCV64in_FpCompare;
+   i->RISCV64in.FpCompare.op   = op;
+   i->RISCV64in.FpCompare.dst  = dst;
+   i->RISCV64in.FpCompare.src1 = src1;
+   i->RISCV64in.FpCompare.src2 = src2;
+   return i;
+}
+
+RISCV64Instr*
+RISCV64Instr_FpLdSt(RISCV64FpLdStOp op, HReg reg, HReg base, Int soff12)
+{
+   RISCV64Instr* i            = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                     = RISCV64in_FpLdSt;
+   i->RISCV64in.FpLdSt.op     = op;
+   i->RISCV64in.FpLdSt.reg    = reg;
+   i->RISCV64in.FpLdSt.base   = base;
+   i->RISCV64in.FpLdSt.soff12 = soff12;
+   return i;
+}
+
+RISCV64Instr*
+RISCV64Instr_CAS(RISCV64CASOp op, HReg old, HReg addr, HReg expd, HReg data)
+{
+   RISCV64Instr* i       = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                = RISCV64in_CAS;
+   i->RISCV64in.CAS.op   = op;
+   i->RISCV64in.CAS.old  = old;
+   i->RISCV64in.CAS.addr = addr;
+   i->RISCV64in.CAS.expd = expd;
+   i->RISCV64in.CAS.data = data;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_FENCE(void)
+{
+   RISCV64Instr* i = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag          = RISCV64in_FENCE;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_CSEL(HReg dst, HReg iftrue, HReg iffalse, HReg cond)
+{
+   RISCV64Instr* i           = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                    = RISCV64in_CSEL;
+   i->RISCV64in.CSEL.dst     = dst;
+   i->RISCV64in.CSEL.iftrue  = iftrue;
+   i->RISCV64in.CSEL.iffalse = iffalse;
+   i->RISCV64in.CSEL.cond    = cond;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_Call(
+   RetLoc rloc, Addr64 target, HReg cond, UChar nArgRegs, UChar nFArgRegs)
+{
+   RISCV64Instr* i             = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                      = RISCV64in_Call;
+   i->RISCV64in.Call.rloc      = rloc;
+   i->RISCV64in.Call.target    = target;
+   i->RISCV64in.Call.cond      = cond;
+   i->RISCV64in.Call.nArgRegs  = nArgRegs;
+   i->RISCV64in.Call.nFArgRegs = nFArgRegs;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_XDirect(
+   Addr64 dstGA, HReg base, Int soff12, HReg cond, Bool toFastEP)
+{
+   RISCV64Instr* i               = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                        = RISCV64in_XDirect;
+   i->RISCV64in.XDirect.dstGA    = dstGA;
+   i->RISCV64in.XDirect.base     = base;
+   i->RISCV64in.XDirect.soff12   = soff12;
+   i->RISCV64in.XDirect.cond     = cond;
+   i->RISCV64in.XDirect.toFastEP = toFastEP;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_XIndir(HReg dstGA, HReg base, Int soff12, HReg cond)
+{
+   RISCV64Instr* i            = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                     = RISCV64in_XIndir;
+   i->RISCV64in.XIndir.dstGA  = dstGA;
+   i->RISCV64in.XIndir.base   = base;
+   i->RISCV64in.XIndir.soff12 = soff12;
+   i->RISCV64in.XIndir.cond   = cond;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_XAssisted(
+   HReg dstGA, HReg base, Int soff12, HReg cond, IRJumpKind jk)
+{
+   RISCV64Instr* i               = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag                        = RISCV64in_XAssisted;
+   i->RISCV64in.XAssisted.dstGA  = dstGA;
+   i->RISCV64in.XAssisted.base   = base;
+   i->RISCV64in.XAssisted.soff12 = soff12;
+   i->RISCV64in.XAssisted.cond   = cond;
+   i->RISCV64in.XAssisted.jk     = jk;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_EvCheck(HReg base_amCounter,
+                                   Int  soff12_amCounter,
+                                   HReg base_amFailAddr,
+                                   Int  soff12_amFailAddr)
+{
+   RISCV64Instr* i = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag          = RISCV64in_EvCheck;
+   i->RISCV64in.EvCheck.base_amCounter    = base_amCounter;
+   i->RISCV64in.EvCheck.soff12_amCounter  = soff12_amCounter;
+   i->RISCV64in.EvCheck.base_amFailAddr   = base_amFailAddr;
+   i->RISCV64in.EvCheck.soff12_amFailAddr = soff12_amFailAddr;
+   return i;
+}
+
+RISCV64Instr* RISCV64Instr_ProfInc(void)
+{
+   RISCV64Instr* i = LibVEX_Alloc_inline(sizeof(RISCV64Instr));
+   i->tag          = RISCV64in_ProfInc;
+   return i;
+}
+
+void ppRISCV64Instr(const RISCV64Instr* i, Bool mode64)
+{
+   vassert(mode64 == True);
+
+   switch (i->tag) {
+   case RISCV64in_LI:
+      vex_printf("li      ");
+      ppHRegRISCV64(i->RISCV64in.LI.dst);
+      vex_printf(", 0x%llx", i->RISCV64in.LI.imm64);
+      return;
+   case RISCV64in_MV:
+      vex_printf("mv      ");
+      ppHRegRISCV64(i->RISCV64in.MV.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.MV.src);
+      return;
+   case RISCV64in_ALU:
+      vex_printf("%-7s ", showRISCV64ALUOp(i->RISCV64in.ALU.op));
+      ppHRegRISCV64(i->RISCV64in.ALU.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.ALU.src1);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.ALU.src2);
+      return;
+   case RISCV64in_ALUImm:
+      vex_printf("%-7s ", showRISCV64ALUImmOp(i->RISCV64in.ALUImm.op));
+      ppHRegRISCV64(i->RISCV64in.ALUImm.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.ALUImm.src);
+      vex_printf(", %d", i->RISCV64in.ALUImm.imm12);
+      return;
+   case RISCV64in_Load:
+      vex_printf("%-7s ", showRISCV64LoadOp(i->RISCV64in.Load.op));
+      ppHRegRISCV64(i->RISCV64in.Load.dst);
+      vex_printf(", %d(", i->RISCV64in.Load.soff12);
+      ppHRegRISCV64(i->RISCV64in.Load.base);
+      vex_printf(")");
+      return;
+   case RISCV64in_Store:
+      vex_printf("%-7s ", showRISCV64StoreOp(i->RISCV64in.Store.op));
+      ppHRegRISCV64(i->RISCV64in.Store.src);
+      vex_printf(", %d(", i->RISCV64in.Store.soff12);
+      ppHRegRISCV64(i->RISCV64in.Store.base);
+      vex_printf(")");
+      return;
+   case RISCV64in_LoadR:
+      vex_printf("%-7s ", showRISCV64LoadROp(i->RISCV64in.LoadR.op));
+      ppHRegRISCV64(i->RISCV64in.LoadR.dst);
+      vex_printf(", (");
+      ppHRegRISCV64(i->RISCV64in.LoadR.addr);
+      vex_printf(")");
+      return;
+   case RISCV64in_StoreC:
+      vex_printf("%-7s ", showRISCV64StoreCOp(i->RISCV64in.StoreC.op));
+      ppHRegRISCV64(i->RISCV64in.StoreC.res);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.StoreC.src);
+      vex_printf(", (");
+      ppHRegRISCV64(i->RISCV64in.StoreC.addr);
+      vex_printf(")");
+      return;
+   case RISCV64in_CSRRW:
+      vex_printf("csrrw   ");
+      ppHRegRISCV64(i->RISCV64in.CSRRW.dst);
+      vex_printf(", %s, ", showRISCV64CSR(i->RISCV64in.CSRRW.csr));
+      ppHRegRISCV64(i->RISCV64in.CSRRW.src);
+      return;
+   case RISCV64in_FpUnary:
+      vex_printf("%-7s ", showRISCV64FpUnaryOp(i->RISCV64in.FpUnary.op));
+      ppHRegRISCV64(i->RISCV64in.FpUnary.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.FpUnary.src);
+      return;
+   case RISCV64in_FpBinary:
+      vex_printf("%-7s ", showRISCV64FpBinaryOp(i->RISCV64in.FpBinary.op));
+      ppHRegRISCV64(i->RISCV64in.FpBinary.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.FpBinary.src1);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.FpBinary.src2);
+      return;
+   case RISCV64in_FpTernary:
+      vex_printf("%-7s ", showRISCV64FpTernaryOp(i->RISCV64in.FpTernary.op));
+      ppHRegRISCV64(i->RISCV64in.FpTernary.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.FpTernary.src1);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.FpTernary.src2);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.FpTernary.src3);
+      return;
+   case RISCV64in_FpMove:
+      vex_printf("%-7s ", showRISCV64FpMoveOp(i->RISCV64in.FpMove.op));
+      ppHRegRISCV64(i->RISCV64in.FpMove.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.FpMove.src);
+      return;
+   case RISCV64in_FpConvert:
+      vex_printf("%-7s ", showRISCV64FpConvertOp(i->RISCV64in.FpConvert.op));
+      ppHRegRISCV64(i->RISCV64in.FpConvert.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.FpConvert.src);
+      return;
+   case RISCV64in_FpCompare:
+      vex_printf("%-7s ", showRISCV64FpCompareOp(i->RISCV64in.FpCompare.op));
+      ppHRegRISCV64(i->RISCV64in.FpCompare.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.FpCompare.src1);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.FpCompare.src2);
+      return;
+   case RISCV64in_FpLdSt:
+      vex_printf("%-7s ", showRISCV64FpLdStOp(i->RISCV64in.FpLdSt.op));
+      ppHRegRISCV64(i->RISCV64in.FpLdSt.reg);
+      vex_printf(", %d(", i->RISCV64in.FpLdSt.soff12);
+      ppHRegRISCV64(i->RISCV64in.FpLdSt.base);
+      vex_printf(")");
+      return;
+   case RISCV64in_CAS: {
+      vassert(i->RISCV64in.CAS.op == RISCV64op_CAS_D ||
+              i->RISCV64in.CAS.op == RISCV64op_CAS_W);
+      Bool is_d = i->RISCV64in.CAS.op == RISCV64op_CAS_D;
+      vex_printf("(%s) 1: %s ", is_d ? "CAS_D" : "CAS_W",
+                 is_d ? "lr.d" : "lr.w");
+      ppHRegRISCV64(i->RISCV64in.CAS.old);
+      vex_printf(", (");
+      ppHRegRISCV64(i->RISCV64in.CAS.addr);
+      vex_printf("); bne ");
+      ppHRegRISCV64(i->RISCV64in.CAS.old);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.CAS.expd);
+      vex_printf(", 2f; %s t0, ", is_d ? "sc.d" : "sc.w");
+      ppHRegRISCV64(i->RISCV64in.CAS.data);
+      vex_printf(", (");
+      ppHRegRISCV64(i->RISCV64in.CAS.addr);
+      vex_printf("); bne t0, zero, 1b; 2:");
+      return;
+   }
+   case RISCV64in_FENCE:
+      vex_printf("fence");
+      return;
+   case RISCV64in_CSEL:
+      vex_printf("(CSEL) beq ");
+      ppHRegRISCV64(i->RISCV64in.CSEL.cond);
+      vex_printf(", zero, 1f; c.mv ");
+      ppHRegRISCV64(i->RISCV64in.CSEL.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.CSEL.iftrue);
+      vex_printf("; c.j 2f; 1: c.mv ");
+      ppHRegRISCV64(i->RISCV64in.CSEL.dst);
+      vex_printf(", ");
+      ppHRegRISCV64(i->RISCV64in.CSEL.iffalse);
+      vex_printf("; 2:");
+      return;
+   case RISCV64in_Call:
+      vex_printf("(Call) ");
+      if (!hregIsInvalid(i->RISCV64in.Call.cond)) {
+         vex_printf("beq ");
+         ppHRegRISCV64(i->RISCV64in.Call.cond);
+         vex_printf(", zero, 1f; ");
+      }
+      vex_printf("li t0, 0x%llx; c.jalr 0(t0) [nArgRegs=%u, nFArgRegs=%u, ",
+                 i->RISCV64in.Call.target, i->RISCV64in.Call.nArgRegs,
+                 i->RISCV64in.Call.nFArgRegs);
+      ppRetLoc(i->RISCV64in.Call.rloc);
+      vex_printf("]; 1:");
+      return;
+   case RISCV64in_XDirect:
+      vex_printf("(xDirect) ");
+      if (!hregIsInvalid(i->RISCV64in.XDirect.cond)) {
+         vex_printf("beq ");
+         ppHRegRISCV64(i->RISCV64in.XDirect.cond);
+         vex_printf(", zero, 1f; ");
+      }
+      vex_printf("li t0, 0x%llx; sd t0, %d(", i->RISCV64in.XDirect.dstGA,
+                 i->RISCV64in.XDirect.soff12);
+      ppHRegRISCV64(i->RISCV64in.XDirect.base);
+      vex_printf("); li t0, <%s>; c.jalr 0(t0); 1:",
+                 i->RISCV64in.XDirect.toFastEP ? "disp_cp_chain_me_to_fastEP"
+                                               : "disp_cp_chain_me_to_slowEP");
+      return;
+   case RISCV64in_XIndir:
+      vex_printf("(xIndir) ");
+      if (!hregIsInvalid(i->RISCV64in.XIndir.cond)) {
+         vex_printf("beq ");
+         ppHRegRISCV64(i->RISCV64in.XIndir.cond);
+         vex_printf(", zero, 1f; ");
+      }
+      vex_printf("sd ");
+      ppHRegRISCV64(i->RISCV64in.XIndir.dstGA);
+      vex_printf(", %d(", i->RISCV64in.XIndir.soff12);
+      ppHRegRISCV64(i->RISCV64in.XIndir.base);
+      vex_printf("); li t0, <disp_cp_xindir>; c.jr 0(t0); 1:");
+      return;
+   case RISCV64in_XAssisted:
+      vex_printf("(xAssisted) ");
+      if (!hregIsInvalid(i->RISCV64in.XAssisted.cond)) {
+         vex_printf("beq ");
+         ppHRegRISCV64(i->RISCV64in.XAssisted.cond);
+         vex_printf(", zero, 1f; ");
+      }
+      vex_printf("sd ");
+      ppHRegRISCV64(i->RISCV64in.XAssisted.dstGA);
+      vex_printf(", %d(", i->RISCV64in.XAssisted.soff12);
+      ppHRegRISCV64(i->RISCV64in.XAssisted.base);
+      vex_printf("); mv s0, $IRJumpKind_to_TRCVAL(%d)",
+                 (Int)i->RISCV64in.XAssisted.jk);
+      vex_printf("; li t0, <disp_cp_xassisted>; c.jr 0(t0); 1:");
+      return;
+   case RISCV64in_EvCheck:
+      vex_printf("(evCheck) lw t0, %d(", i->RISCV64in.EvCheck.soff12_amCounter);
+      ppHRegRISCV64(i->RISCV64in.EvCheck.base_amCounter);
+      vex_printf("); c.addiw t0, -1; sw t0, %d(",
+                 i->RISCV64in.EvCheck.soff12_amCounter);
+      ppHRegRISCV64(i->RISCV64in.EvCheck.base_amCounter);
+      vex_printf("); bge t0, zero, 1f; ld t0, %d(",
+                 i->RISCV64in.EvCheck.soff12_amFailAddr);
+      ppHRegRISCV64(i->RISCV64in.EvCheck.base_amFailAddr);
+      vex_printf("); c.jr 0(t0); 1:");
+      return;
+   case RISCV64in_ProfInc:
+      vex_printf("(profInc) li t1, $NotKnownYet; "
+                 "ld t0, 0(t1); c.addi t0, t0, 1; sd t0, 0(t1)");
+      return;
+   default:
+      vpanic("ppRISCV64Instr");
+   }
+}
+
+/*------------------------------------------------------------*/
+/*--- Helpers for register allocation                      ---*/
+/*------------------------------------------------------------*/
+
+/* Initialise and return the "register universe", i.e. a list of all hardware
+   registers. Called once. */
+const RRegUniverse* getRRegUniverse_RISCV64(void)
+{
+   static RRegUniverse all_regs;
+   static Bool         initialised = False;
+   RRegUniverse*       ru          = &all_regs;
+
+   if (LIKELY(initialised))
+      return ru;
+
+   RRegUniverse__init(ru);
+
+   /* Add the registers that are available to the register allocator. */
+   /* TODO */
+   ru->allocable_start[HRcInt64] = ru->size;
+   ru->regs[ru->size++]          = hregRISCV64_x18(); /* s2 */
+   ru->regs[ru->size++]          = hregRISCV64_x19(); /* s3 */
+   ru->regs[ru->size++]          = hregRISCV64_x20(); /* s4 */
+   ru->regs[ru->size++]          = hregRISCV64_x21(); /* s5 */
+   ru->regs[ru->size++]          = hregRISCV64_x22(); /* s6 */
+   ru->regs[ru->size++]          = hregRISCV64_x23(); /* s7 */
+   ru->regs[ru->size++]          = hregRISCV64_x24(); /* s8 */
+   ru->regs[ru->size++]          = hregRISCV64_x25(); /* s9 */
+   ru->regs[ru->size++]          = hregRISCV64_x26(); /* s10 */
+   ru->regs[ru->size++]          = hregRISCV64_x27(); /* s11 */
+   ru->regs[ru->size++]          = hregRISCV64_x10(); /* a0 */
+   ru->regs[ru->size++]          = hregRISCV64_x11(); /* a1 */
+   ru->regs[ru->size++]          = hregRISCV64_x12(); /* a2 */
+   ru->regs[ru->size++]          = hregRISCV64_x13(); /* a3 */
+   ru->regs[ru->size++]          = hregRISCV64_x14(); /* a4 */
+   ru->regs[ru->size++]          = hregRISCV64_x15(); /* a5 */
+   ru->regs[ru->size++]          = hregRISCV64_x16(); /* a6 */
+   ru->regs[ru->size++]          = hregRISCV64_x17(); /* a7 */
+   ru->allocable_end[HRcInt64]   = ru->size - 1;
+
+   /* Floating-point registers, all of which are caller-saved. */
+   ru->allocable_start[HRcFlt64] = ru->size;
+   ru->regs[ru->size++]          = hregRISCV64_f0();  /* ft0 */
+   ru->regs[ru->size++]          = hregRISCV64_f1();  /* ft1 */
+   ru->regs[ru->size++]          = hregRISCV64_f2();  /* ft2 */
+   ru->regs[ru->size++]          = hregRISCV64_f3();  /* ft3 */
+   ru->regs[ru->size++]          = hregRISCV64_f4();  /* ft4 */
+   ru->regs[ru->size++]          = hregRISCV64_f5();  /* ft5 */
+   ru->regs[ru->size++]          = hregRISCV64_f6();  /* ft6 */
+   ru->regs[ru->size++]          = hregRISCV64_f7();  /* ft7 */
+   ru->regs[ru->size++]          = hregRISCV64_f10(); /* fa0 */
+   ru->regs[ru->size++]          = hregRISCV64_f11(); /* fa1 */
+   ru->regs[ru->size++]          = hregRISCV64_f12(); /* fa2 */
+   ru->regs[ru->size++]          = hregRISCV64_f13(); /* fa3 */
+   ru->regs[ru->size++]          = hregRISCV64_f14(); /* fa4 */
+   ru->regs[ru->size++]          = hregRISCV64_f15(); /* fa5 */
+   ru->regs[ru->size++]          = hregRISCV64_f16(); /* fa6 */
+   ru->regs[ru->size++]          = hregRISCV64_f17(); /* fa7 */
+   ru->regs[ru->size++]          = hregRISCV64_f28(); /* ft8 */
+   ru->regs[ru->size++]          = hregRISCV64_f29(); /* ft9 */
+   ru->regs[ru->size++]          = hregRISCV64_f30(); /* ft10 */
+   ru->regs[ru->size++]          = hregRISCV64_f31(); /* ft11 */
+   ru->allocable_end[HRcFlt64]   = ru->size - 1;
+   ru->allocable                 = ru->size;
+
+   /* Add the registers that are not available for allocation. */
+   /* TODO */
+   ru->regs[ru->size++] = hregRISCV64_x0(); /* zero */
+   ru->regs[ru->size++] = hregRISCV64_x2(); /* sp */
+   ru->regs[ru->size++] = hregRISCV64_x8(); /* s0 */
+
+   initialised = True;
+
+   RRegUniverse__check_is_sane(ru);
+   return ru;
+}
+
+/* Tell the register allocator how the given instruction uses the registers it
+   refers to. */
+void getRegUsage_RISCV64Instr(HRegUsage* u, const RISCV64Instr* i, Bool mode64)
+{
+   vassert(mode64 == True);
+
+   initHRegUsage(u);
+   switch (i->tag) {
+   case RISCV64in_LI:
+      addHRegUse(u, HRmWrite, i->RISCV64in.LI.dst);
+      return;
+   case RISCV64in_MV:
+      addHRegUse(u, HRmWrite, i->RISCV64in.MV.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.MV.src);
+      return;
+   case RISCV64in_ALU:
+      addHRegUse(u, HRmWrite, i->RISCV64in.ALU.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.ALU.src1);
+      addHRegUse(u, HRmRead, i->RISCV64in.ALU.src2);
+      return;
+   case RISCV64in_ALUImm:
+      addHRegUse(u, HRmWrite, i->RISCV64in.ALUImm.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.ALUImm.src);
+      return;
+   case RISCV64in_Load:
+      addHRegUse(u, HRmWrite, i->RISCV64in.Load.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.Load.base);
+      return;
+   case RISCV64in_Store:
+      addHRegUse(u, HRmRead, i->RISCV64in.Store.src);
+      addHRegUse(u, HRmRead, i->RISCV64in.Store.base);
+      return;
+   case RISCV64in_LoadR:
+      addHRegUse(u, HRmWrite, i->RISCV64in.LoadR.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.LoadR.addr);
+      return;
+   case RISCV64in_StoreC:
+      addHRegUse(u, HRmWrite, i->RISCV64in.StoreC.res);
+      addHRegUse(u, HRmRead, i->RISCV64in.StoreC.src);
+      addHRegUse(u, HRmRead, i->RISCV64in.StoreC.addr);
+      return;
+   case RISCV64in_CSRRW:
+      addHRegUse(u, HRmWrite, i->RISCV64in.CSRRW.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.CSRRW.src);
+      return;
+   case RISCV64in_FpUnary:
+      addHRegUse(u, HRmWrite, i->RISCV64in.FpUnary.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.FpUnary.src);
+      return;
+   case RISCV64in_FpBinary:
+      addHRegUse(u, HRmWrite, i->RISCV64in.FpBinary.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.FpBinary.src1);
+      addHRegUse(u, HRmRead, i->RISCV64in.FpBinary.src2);
+      return;
+   case RISCV64in_FpTernary:
+      addHRegUse(u, HRmWrite, i->RISCV64in.FpTernary.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.FpTernary.src1);
+      addHRegUse(u, HRmRead, i->RISCV64in.FpTernary.src2);
+      addHRegUse(u, HRmRead, i->RISCV64in.FpTernary.src3);
+      return;
+   case RISCV64in_FpMove:
+      addHRegUse(u, HRmWrite, i->RISCV64in.FpMove.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.FpMove.src);
+      return;
+   case RISCV64in_FpConvert:
+      addHRegUse(u, HRmWrite, i->RISCV64in.FpConvert.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.FpConvert.src);
+      return;
+   case RISCV64in_FpCompare:
+      addHRegUse(u, HRmWrite, i->RISCV64in.FpCompare.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.FpCompare.src1);
+      addHRegUse(u, HRmRead, i->RISCV64in.FpCompare.src2);
+      return;
+   case RISCV64in_FpLdSt:
+      switch (i->RISCV64in.FpLdSt.op) {
+      case RISCV64op_FLW:
+      case RISCV64op_FLD:
+         addHRegUse(u, HRmWrite, i->RISCV64in.FpLdSt.reg);
+         break;
+      case RISCV64op_FSW:
+      case RISCV64op_FSD:
+         addHRegUse(u, HRmRead, i->RISCV64in.FpLdSt.reg);
+         break;
+      }
+      addHRegUse(u, HRmRead, i->RISCV64in.FpLdSt.base);
+      return;
+   case RISCV64in_CAS:
+      addHRegUse(u, HRmWrite, i->RISCV64in.CAS.old);
+      addHRegUse(u, HRmRead, i->RISCV64in.CAS.addr);
+      addHRegUse(u, HRmRead, i->RISCV64in.CAS.expd);
+      addHRegUse(u, HRmRead, i->RISCV64in.CAS.data);
+      return;
+   case RISCV64in_FENCE:
+      return;
+   case RISCV64in_CSEL:
+      addHRegUse(u, HRmWrite, i->RISCV64in.CSEL.dst);
+      addHRegUse(u, HRmRead, i->RISCV64in.CSEL.iftrue);
+      addHRegUse(u, HRmRead, i->RISCV64in.CSEL.iffalse);
+      addHRegUse(u, HRmRead, i->RISCV64in.CSEL.cond);
+      return;
+   case RISCV64in_Call:
+      /* Logic and comments copied/modified from the arm64 backend. */
+      /* First off, claim it trashes all the caller-saved registers which fall
+         within the register allocator's jurisdiction. */
+      addHRegUse(u, HRmWrite, hregRISCV64_x10());
+      addHRegUse(u, HRmWrite, hregRISCV64_x11());
+      addHRegUse(u, HRmWrite, hregRISCV64_x12());
+      addHRegUse(u, HRmWrite, hregRISCV64_x13());
+      addHRegUse(u, HRmWrite, hregRISCV64_x14());
+      addHRegUse(u, HRmWrite, hregRISCV64_x15());
+      addHRegUse(u, HRmWrite, hregRISCV64_x16());
+      addHRegUse(u, HRmWrite, hregRISCV64_x17());
+      addHRegUse(u, HRmWrite, hregRISCV64_f0());
+      addHRegUse(u, HRmWrite, hregRISCV64_f1());
+      addHRegUse(u, HRmWrite, hregRISCV64_f2());
+      addHRegUse(u, HRmWrite, hregRISCV64_f3());
+      addHRegUse(u, HRmWrite, hregRISCV64_f4());
+      addHRegUse(u, HRmWrite, hregRISCV64_f5());
+      addHRegUse(u, HRmWrite, hregRISCV64_f6());
+      addHRegUse(u, HRmWrite, hregRISCV64_f7());
+      addHRegUse(u, HRmWrite, hregRISCV64_f10());
+      addHRegUse(u, HRmWrite, hregRISCV64_f11());
+      addHRegUse(u, HRmWrite, hregRISCV64_f12());
+      addHRegUse(u, HRmWrite, hregRISCV64_f13());
+      addHRegUse(u, HRmWrite, hregRISCV64_f14());
+      addHRegUse(u, HRmWrite, hregRISCV64_f15());
+      addHRegUse(u, HRmWrite, hregRISCV64_f16());
+      addHRegUse(u, HRmWrite, hregRISCV64_f17());
+      addHRegUse(u, HRmWrite, hregRISCV64_f28());
+      addHRegUse(u, HRmWrite, hregRISCV64_f29());
+      addHRegUse(u, HRmWrite, hregRISCV64_f30());
+      addHRegUse(u, HRmWrite, hregRISCV64_f31());
+      /* Now we have to state any parameter-carrying registers which might be
+         read. This depends on nArgRegs and nFArgRegs. */
+      switch (i->RISCV64in.Call.nArgRegs) {
+      case 8:
+         addHRegUse(u, HRmRead, hregRISCV64_x17()); /*fallthru*/
+      case 7:
+         addHRegUse(u, HRmRead, hregRISCV64_x16()); /*fallthru*/
+      case 6:
+         addHRegUse(u, HRmRead, hregRISCV64_x15()); /*fallthru*/
+      case 5:
+         addHRegUse(u, HRmRead, hregRISCV64_x14()); /*fallthru*/
+      case 4:
+         addHRegUse(u, HRmRead, hregRISCV64_x13()); /*fallthru*/
+      case 3:
+         addHRegUse(u, HRmRead, hregRISCV64_x12()); /*fallthru*/
+      case 2:
+         addHRegUse(u, HRmRead, hregRISCV64_x11()); /*fallthru*/
+      case 1:
+         addHRegUse(u, HRmRead, hregRISCV64_x10());
+         break;
+      case 0:
+         break;
+      default:
+         vpanic("getRegUsage_RISCV64Instr:Call:regparms");
+      }
+      switch (i->RISCV64in.Call.nFArgRegs) {
+      case 8:
+         addHRegUse(u, HRmRead, hregRISCV64_f17()); /*fallthru*/
+      case 7:
+         addHRegUse(u, HRmRead, hregRISCV64_f16()); /*fallthru*/
+      case 6:
+         addHRegUse(u, HRmRead, hregRISCV64_f15()); /*fallthru*/
+      case 5:
+         addHRegUse(u, HRmRead, hregRISCV64_f14()); /*fallthru*/
+      case 4:
+         addHRegUse(u, HRmRead, hregRISCV64_f13()); /*fallthru*/
+      case 3:
+         addHRegUse(u, HRmRead, hregRISCV64_f12()); /*fallthru*/
+      case 2:
+         addHRegUse(u, HRmRead, hregRISCV64_f11()); /*fallthru*/
+      case 1:
+         addHRegUse(u, HRmRead, hregRISCV64_f10());
+         break;
+      case 0:
+         break;
+      default:
+         vpanic("getRegUsage_RISCV64Instr:Call:fregparms");
+      }
+      /* Finally, add the condition register. */
+      if (!hregIsInvalid(i->RISCV64in.Call.cond))
+         addHRegUse(u, HRmRead, i->RISCV64in.Call.cond);
+      return;
+   /* XDirect/XIndir/XAssisted are also a bit subtle. They conditionally exit
+      the block. Hence we only need to list (1) the registers that they read,
+      and (2) the registers that they write in the case where the block is not
+      exited. (2) is empty, hence only (1) is relevant here. */
+   case RISCV64in_XDirect:
+      addHRegUse(u, HRmRead, i->RISCV64in.XDirect.base);
+      if (!hregIsInvalid(i->RISCV64in.XDirect.cond))
+         addHRegUse(u, HRmRead, i->RISCV64in.XDirect.cond);
+      return;
+   case RISCV64in_XIndir:
+      addHRegUse(u, HRmRead, i->RISCV64in.XIndir.dstGA);
+      addHRegUse(u, HRmRead, i->RISCV64in.XIndir.base);
+      if (!hregIsInvalid(i->RISCV64in.XIndir.cond))
+         addHRegUse(u, HRmRead, i->RISCV64in.XIndir.cond);
+      return;
+   case RISCV64in_XAssisted:
+      addHRegUse(u, HRmRead, i->RISCV64in.XAssisted.dstGA);
+      addHRegUse(u, HRmRead, i->RISCV64in.XAssisted.base);
+      if (!hregIsInvalid(i->RISCV64in.XAssisted.cond))
+         addHRegUse(u, HRmRead, i->RISCV64in.XAssisted.cond);
+      return;
+   case RISCV64in_EvCheck:
+      /* We expect both amodes only to mention x8/s0, so this is in fact
+         pointless, since the register isn't allocatable, but anyway.. */
+      addHRegUse(u, HRmRead, i->RISCV64in.EvCheck.base_amCounter);
+      addHRegUse(u, HRmRead, i->RISCV64in.EvCheck.base_amFailAddr);
+      return;
+   case RISCV64in_ProfInc:
+      /* Does not use any registers known to RA. */
+      return;
+   default:
+      ppRISCV64Instr(i, mode64);
+      vpanic("getRegUsage_RISCV64Instr");
+   }
+}
+
+/* Local helper. */
+static void mapReg(HRegRemap* m, HReg* r) { *r = lookupHRegRemap(m, *r); }
+
+/* Map the registers of the given instruction. */
+void mapRegs_RISCV64Instr(HRegRemap* m, RISCV64Instr* i, Bool mode64)
+{
+   vassert(mode64 == True);
+
+   switch (i->tag) {
+   case RISCV64in_LI:
+      mapReg(m, &i->RISCV64in.LI.dst);
+      return;
+   case RISCV64in_MV:
+      mapReg(m, &i->RISCV64in.MV.dst);
+      mapReg(m, &i->RISCV64in.MV.src);
+      return;
+   case RISCV64in_ALU:
+      mapReg(m, &i->RISCV64in.ALU.dst);
+      mapReg(m, &i->RISCV64in.ALU.src1);
+      mapReg(m, &i->RISCV64in.ALU.src2);
+      return;
+   case RISCV64in_ALUImm:
+      mapReg(m, &i->RISCV64in.ALUImm.dst);
+      mapReg(m, &i->RISCV64in.ALUImm.src);
+      return;
+   case RISCV64in_Load:
+      mapReg(m, &i->RISCV64in.Load.dst);
+      mapReg(m, &i->RISCV64in.Load.base);
+      return;
+   case RISCV64in_Store:
+      mapReg(m, &i->RISCV64in.Store.src);
+      mapReg(m, &i->RISCV64in.Store.base);
+      return;
+   case RISCV64in_LoadR:
+      mapReg(m, &i->RISCV64in.LoadR.dst);
+      mapReg(m, &i->RISCV64in.LoadR.addr);
+      return;
+   case RISCV64in_StoreC:
+      mapReg(m, &i->RISCV64in.StoreC.res);
+      mapReg(m, &i->RISCV64in.StoreC.src);
+      mapReg(m, &i->RISCV64in.StoreC.addr);
+      return;
+   case RISCV64in_CSRRW:
+      mapReg(m, &i->RISCV64in.CSRRW.dst);
+      mapReg(m, &i->RISCV64in.CSRRW.src);
+      return;
+   case RISCV64in_FpUnary:
+      mapReg(m, &i->RISCV64in.FpUnary.dst);
+      mapReg(m, &i->RISCV64in.FpUnary.src);
+      return;
+   case RISCV64in_FpBinary:
+      mapReg(m, &i->RISCV64in.FpBinary.dst);
+      mapReg(m, &i->RISCV64in.FpBinary.src1);
+      mapReg(m, &i->RISCV64in.FpBinary.src2);
+      return;
+   case RISCV64in_FpTernary:
+      mapReg(m, &i->RISCV64in.FpTernary.dst);
+      mapReg(m, &i->RISCV64in.FpTernary.src1);
+      mapReg(m, &i->RISCV64in.FpTernary.src2);
+      mapReg(m, &i->RISCV64in.FpTernary.src3);
+      return;
+   case RISCV64in_FpMove:
+      mapReg(m, &i->RISCV64in.FpMove.dst);
+      mapReg(m, &i->RISCV64in.FpMove.src);
+      return;
+   case RISCV64in_FpConvert:
+      mapReg(m, &i->RISCV64in.FpConvert.dst);
+      mapReg(m, &i->RISCV64in.FpConvert.src);
+      return;
+   case RISCV64in_FpCompare:
+      mapReg(m, &i->RISCV64in.FpCompare.dst);
+      mapReg(m, &i->RISCV64in.FpCompare.src1);
+      mapReg(m, &i->RISCV64in.FpCompare.src2);
+      return;
+   case RISCV64in_FpLdSt:
+      mapReg(m, &i->RISCV64in.FpLdSt.reg);
+      mapReg(m, &i->RISCV64in.FpLdSt.base);
+      return;
+   case RISCV64in_CAS:
+      mapReg(m, &i->RISCV64in.CAS.old);
+      mapReg(m, &i->RISCV64in.CAS.addr);
+      mapReg(m, &i->RISCV64in.CAS.expd);
+      mapReg(m, &i->RISCV64in.CAS.data);
+      return;
+   case RISCV64in_FENCE:
+      return;
+   case RISCV64in_CSEL:
+      mapReg(m, &i->RISCV64in.CSEL.dst);
+      mapReg(m, &i->RISCV64in.CSEL.iftrue);
+      mapReg(m, &i->RISCV64in.CSEL.iffalse);
+      mapReg(m, &i->RISCV64in.CSEL.cond);
+      return;
+   case RISCV64in_Call:
+      if (!hregIsInvalid(i->RISCV64in.Call.cond))
+         mapReg(m, &i->RISCV64in.Call.cond);
+      return;
+   case RISCV64in_XDirect:
+      mapReg(m, &i->RISCV64in.XDirect.base);
+      if (!hregIsInvalid(i->RISCV64in.XDirect.cond))
+         mapReg(m, &i->RISCV64in.XDirect.cond);
+      return;
+   case RISCV64in_XIndir:
+      mapReg(m, &i->RISCV64in.XIndir.dstGA);
+      mapReg(m, &i->RISCV64in.XIndir.base);
+      if (!hregIsInvalid(i->RISCV64in.XIndir.cond))
+         mapReg(m, &i->RISCV64in.XIndir.cond);
+      return;
+   case RISCV64in_XAssisted:
+      mapReg(m, &i->RISCV64in.XAssisted.dstGA);
+      mapReg(m, &i->RISCV64in.XAssisted.base);
+      if (!hregIsInvalid(i->RISCV64in.XAssisted.cond))
+         mapReg(m, &i->RISCV64in.XAssisted.cond);
+      return;
+   case RISCV64in_EvCheck:
+      /* We expect both amodes only to mention x8/s0, so this is in fact
+         pointless, since the register isn't allocatable, but anyway.. */
+      mapReg(m, &i->RISCV64in.EvCheck.base_amCounter);
+      mapReg(m, &i->RISCV64in.EvCheck.base_amFailAddr);
+      return;
+   case RISCV64in_ProfInc:
+      /* Hardwires x5/t0 and x6/t1 -- nothing to modify. */
+      return;
+   default:
+      ppRISCV64Instr(i, mode64);
+      vpanic("mapRegs_RISCV64Instr");
+   }
+}
+
+/* Generate riscv64 spill/reload instructions under the direction of the
+   register allocator. Note it's critical these don't write the condition
+   codes. */
+void genSpill_RISCV64(/*OUT*/ HInstr** i1,
+                      /*OUT*/ HInstr** i2,
+                      HReg             rreg,
+                      Int              offsetB,
+                      Bool             mode64)
+{
+   vassert(offsetB >= 0);
+   vassert(!hregIsVirtual(rreg));
+   vassert(mode64 == True);
+
+   HReg base   = get_baseblock_register();
+   Int  soff12 = offsetB - BASEBLOCK_OFFSET_ADJUSTMENT;
+   vassert(soff12 >= -2048 && soff12 < 2048);
+
+   HRegClass rclass = hregClass(rreg);
+   switch (rclass) {
+   case HRcInt64:
+      *i1 = RISCV64Instr_Store(RISCV64op_SD, rreg, base, soff12);
+      return;
+   case HRcFlt64:
+      *i1 = RISCV64Instr_FpLdSt(RISCV64op_FSD, rreg, base, soff12);
+      return;
+   default:
+      ppHRegClass(rclass);
+      vpanic("genSpill_RISCV64: unimplemented regclass");
+   }
+}
+
+void genReload_RISCV64(/*OUT*/ HInstr** i1,
+                       /*OUT*/ HInstr** i2,
+                       HReg             rreg,
+                       Int              offsetB,
+                       Bool             mode64)
+{
+   vassert(offsetB >= 0);
+   vassert(!hregIsVirtual(rreg));
+   vassert(mode64 == True);
+
+   HReg base   = get_baseblock_register();
+   Int  soff12 = offsetB - BASEBLOCK_OFFSET_ADJUSTMENT;
+   vassert(soff12 >= -2048 && soff12 < 2048);
+
+   HRegClass rclass = hregClass(rreg);
+   switch (rclass) {
+   case HRcInt64:
+      *i1 = RISCV64Instr_Load(RISCV64op_LD, rreg, base, soff12);
+      return;
+   case HRcFlt64:
+      *i1 = RISCV64Instr_FpLdSt(RISCV64op_FLD, rreg, base, soff12);
+      return;
+   default:
+      ppHRegClass(rclass);
+      vpanic("genReload_RISCV64: unimplemented regclass");
+   }
+}
+
+RISCV64Instr* genMove_RISCV64(HReg from, HReg to, Bool mode64)
+{
+   vassert(mode64 == True);
+
+   HRegClass rclass = hregClass(from);
+   switch (rclass) {
+   case HRcInt64:
+      return RISCV64Instr_MV(to, from);
+   case HRcFlt64:
+      return RISCV64Instr_FpMove(RISCV64op_FMV_D, to, from);
+   default:
+      ppHRegClass(rclass);
+      vpanic("genMove_RISCV64: unimplemented regclass");
+   }
+}
+
+/*------------------------------------------------------------*/
+/*--- Functions to emit a sequence of bytes                ---*/
+/*------------------------------------------------------------*/
+
+static inline UChar* emit16(UChar* p, UShort val)
+{
+   *p++ = (val >> 0) & 0xff;
+   *p++ = (val >> 8) & 0xff;
+   return p;
+}
+
+static inline UChar* emit32(UChar* p, UInt val)
+{
+   *p++ = (val >> 0) & 0xff;
+   *p++ = (val >> 8) & 0xff;
+   *p++ = (val >> 16) & 0xff;
+   *p++ = (val >> 24) & 0xff;
+   return p;
+}
+
+/*------------------------------------------------------------*/
+/*--- Functions to emit various instruction formats        ---*/
+/*------------------------------------------------------------*/
+
+/* Emit an R-type instruction. */
+static UChar* emit_R(
+   UChar* p, UInt opcode, UInt rd, UInt funct3, UInt rs1, UInt rs2, UInt funct7)
+{
+   vassert(opcode >> 7 == 0);
+   vassert(rd >> 5 == 0);
+   vassert(funct3 >> 3 == 0);
+   vassert(rs1 >> 5 == 0);
+   vassert(rs2 >> 5 == 0);
+   vassert(funct7 >> 7 == 0);
+
+   UInt the_insn = 0;
+
+   the_insn |= opcode << 0;
+   the_insn |= rd << 7;
+   the_insn |= funct3 << 12;
+   the_insn |= rs1 << 15;
+   the_insn |= rs2 << 20;
+   the_insn |= funct7 << 25;
+
+   return emit32(p, the_insn);
+}
+
+/* Emit an I-type instruction. */
+static UChar*
+emit_I(UChar* p, UInt opcode, UInt rd, UInt funct3, UInt rs1, UInt imm11_0)
+{
+   vassert(opcode >> 7 == 0);
+   vassert(rd >> 5 == 0);
+   vassert(funct3 >> 3 == 0);
+   vassert(rs1 >> 5 == 0);
+   vassert(imm11_0 >> 12 == 0);
+
+   UInt the_insn = 0;
+
+   the_insn |= opcode << 0;
+   the_insn |= rd << 7;
+   the_insn |= funct3 << 12;
+   the_insn |= rs1 << 15;
+   the_insn |= imm11_0 << 20;
+
+   return emit32(p, the_insn);
+}
+
+/* Emit an S-type instruction. */
+static UChar*
+emit_S(UChar* p, UInt opcode, UInt imm11_0, UInt funct3, UInt rs1, UInt rs2)
+{
+   vassert(opcode >> 7 == 0);
+   vassert(imm11_0 >> 12 == 0);
+   vassert(funct3 >> 3 == 0);
+   vassert(rs1 >> 5 == 0);
+   vassert(rs2 >> 5 == 0);
+
+   UInt imm4_0  = (imm11_0 >> 0) & 0x1f;
+   UInt imm11_5 = (imm11_0 >> 5) & 0x7f;
+
+   UInt the_insn = 0;
+
+   the_insn |= opcode << 0;
+   the_insn |= imm4_0 << 7;
+   the_insn |= funct3 << 12;
+   the_insn |= rs1 << 15;
+   the_insn |= rs2 << 20;
+   the_insn |= imm11_5 << 25;
+
+   return emit32(p, the_insn);
+}
+
+/* Emit a B-type instruction. */
+static UChar*
+emit_B(UChar* p, UInt opcode, UInt imm12_1, UInt funct3, UInt rs1, UInt rs2)
+{
+   vassert(opcode >> 7 == 0);
+   vassert(imm12_1 >> 12 == 0);
+   vassert(funct3 >> 3 == 0);
+   vassert(rs1 >> 5 == 0);
+   vassert(rs2 >> 5 == 0);
+
+   UInt imm11_11 = (imm12_1 >> 10) & 0x1;
+   UInt imm4_1   = (imm12_1 >> 0) & 0xf;
+   UInt imm10_5  = (imm12_1 >> 4) & 0x3f;
+   UInt imm12_12 = (imm12_1 >> 11) & 0x1;
+
+   UInt the_insn = 0;
+
+   the_insn |= opcode << 0;
+   the_insn |= imm11_11 << 7;
+   the_insn |= imm4_1 << 8;
+   the_insn |= funct3 << 12;
+   the_insn |= rs1 << 15;
+   the_insn |= rs2 << 20;
+   the_insn |= imm10_5 << 25;
+   the_insn |= imm12_12 << 31;
+
+   return emit32(p, the_insn);
+}
+
+/* Emit a U-type instruction. */
+static UChar* emit_U(UChar* p, UInt opcode, UInt rd, UInt imm31_12)
+{
+   vassert(opcode >> 7 == 0);
+   vassert(rd >> 5 == 0);
+   vassert(imm31_12 >> 20 == 0);
+
+   UInt the_insn = 0;
+
+   the_insn |= opcode << 0;
+   the_insn |= rd << 7;
+   the_insn |= imm31_12 << 12;
+
+   return emit32(p, the_insn);
+}
+
+/* Emit a CR-type instruction. */
+static UChar* emit_CR(UChar* p, UInt opcode, UInt rs2, UInt rd, UInt funct4)
+{
+   vassert(opcode >> 2 == 0);
+   vassert(rs2 >> 5 == 0);
+   vassert(rd >> 5 == 0);
+   vassert(funct4 >> 4 == 0);
+
+   UShort the_insn = 0;
+
+   the_insn |= opcode << 0;
+   the_insn |= rs2 << 2;
+   the_insn |= rd << 7;
+   the_insn |= funct4 << 12;
+
+   return emit16(p, the_insn);
+}
+
+/* Emit a CI-type instruction. */
+static UChar* emit_CI(UChar* p, UInt opcode, UInt imm5_0, UInt rd, UInt funct3)
+{
+   vassert(opcode >> 2 == 0);
+   vassert(imm5_0 >> 6 == 0);
+   vassert(rd >> 5 == 0);
+   vassert(funct3 >> 3 == 0);
+
+   UInt imm4_0 = (imm5_0 >> 0) & 0x1f;
+   UInt imm5_5 = (imm5_0 >> 5) & 0x1;
+
+   UShort the_insn = 0;
+
+   the_insn |= opcode << 0;
+   the_insn |= imm4_0 << 2;
+   the_insn |= rd << 7;
+   the_insn |= imm5_5 << 12;
+   the_insn |= funct3 << 13;
+
+   return emit16(p, the_insn);
+}
+
+/* Emit a CJ-type instruction. */
+static UChar* emit_CJ(UChar* p, UInt opcode, UInt imm11_1, UInt funct3)
+{
+   vassert(opcode >> 2 == 0);
+   vassert(imm11_1 >> 11 == 0);
+   vassert(funct3 >> 3 == 0);
+
+   UInt imm5_5   = (imm11_1 >> 4) & 0x1;
+   UInt imm3_1   = (imm11_1 >> 0) & 0x7;
+   UInt imm7_7   = (imm11_1 >> 6) & 0x1;
+   UInt imm6_6   = (imm11_1 >> 5) & 0x1;
+   UInt imm10_10 = (imm11_1 >> 9) & 0x1;
+   UInt imm9_8   = (imm11_1 >> 7) & 0x3;
+   UInt imm4_4   = (imm11_1 >> 3) & 0x1;
+   UInt imm11_11 = (imm11_1 >> 10) & 0x1;
+
+   UShort the_insn = 0;
+
+   the_insn |= opcode << 0;
+   the_insn |= imm5_5 << 2;
+   the_insn |= imm3_1 << 3;
+   the_insn |= imm7_7 << 6;
+   the_insn |= imm6_6 << 7;
+   the_insn |= imm10_10 << 8;
+   the_insn |= imm9_8 << 9;
+   the_insn |= imm4_4 << 11;
+   the_insn |= imm11_11 << 12;
+   the_insn |= funct3 << 13;
+
+   return emit16(p, the_insn);
+}
+
+/*------------------------------------------------------------*/
+/*--- Code generation                                      ---*/
+/*------------------------------------------------------------*/
+
+/* Get an immediate into a register, using only that register. */
+static UChar* imm64_to_ireg(UChar* p, UInt dst, ULong imm64)
+{
+   vassert(dst > 0 && dst <= 31);
+
+   Long simm64 = imm64;
+
+   if (simm64 >= -32 && simm64 <= 31) {
+      /* c.li dst, simm64[5:0] */
+      return emit_CI(p, 0b01, imm64 & 0x3f, dst, 0b010);
+   }
+
+   /* TODO Add implementation with addi only and c.lui+addi. */
+
+   if (simm64 >= -2147483648 && simm64 <= 2147483647) {
+      /* lui dst, simm64[31:12]+simm64[11] */
+      p = emit_U(p, 0b0110111, dst, ((imm64 + 0x800) >> 12) & 0xfffff);
+      if ((imm64 & 0xfff) == 0)
+         return p;
+      /* addiw dst, dst, simm64[11:0] */
+      return emit_I(p, 0b0011011, dst, 0b000, dst, imm64 & 0xfff);
+   }
+
+   /* Handle a constant that is out of the 32-bit signed integer range. */
+   /* Strip the low 12 bits. */
+   ULong imm11_0 = imm64 & 0xfff;
+
+   /* Get the remaining adjusted upper bits. */
+   ULong rem   = (simm64 + 0x800) >> 12;
+   UInt  sham6 = 12 + __builtin_ctzll(rem);
+   vassert(sham6 < 64);
+   rem = vex_sx_to_64(rem >> (sham6 - 12), 64 - sham6);
+
+   /* Generate instructions to load the upper bits. */
+   p = imm64_to_ireg(p, dst, rem);
+   /* c.slli dst, sham6 */
+   p = emit_CI(p, 0b10, sham6, dst, 0b000);
+
+   /* Add the low bits in. */
+   if (imm11_0 == 0)
+      return p;
+   UInt imm5_0 = imm11_0 & 0x3f;
+   if (vex_sx_to_64(imm5_0, 6) == vex_sx_to_64(imm11_0, 12)) {
+      /* c.addi dst, imm5_0 */
+      p = emit_CI(p, 0b01, imm5_0, dst, 0b000);
+   } else {
+      /* addi dst, dst, imm11_0 */
+      p = emit_I(p, 0b0010011, dst, 0b000, dst, imm11_0);
+   }
+
+   return p;
+}
+
+/* Get a 48-bit address into a register, using only that register, and
+   generating a constant number of instructions with 18 bytes in size,
+   regardless of the value of the address. This is used when generating
+   sections of code that need to be patched later, so as to guarantee a
+   specific size.
+
+   Notice that this function is designed to support target systems that use the
+   Sv39 or Sv48 virtual-memory system. The input address is checked to be in
+   the Sv48 format, that is bits [63:48] must be all equal to bit 47.
+   Utilizing the fact that the address is only 48-bits in size allows to save 2
+   instructions compared to materializing a full 64-bit address.
+
+   TODO Review if generating instead 'c.ld dst, 1f; c.j 2f; .align 3;
+   1: .quad imm; 2:' is possible and would be better.
+   */
+static UChar* addr48_to_ireg_EXACTLY_18B(UChar* p, UInt dst, ULong imm48)
+{
+   vassert(imm48 >> 47 == 0 || imm48 >> 47 == 0x1ffff);
+
+   ULong rem = imm48;
+   ULong imm47_28, imm27_16, imm15_4, imm3_0;
+   imm3_0   = rem & 0xf;
+   rem      = (rem + 0x8) >> 4;
+   imm15_4  = rem & 0xfff;
+   rem      = (rem + 0x800) >> 12;
+   imm27_16 = rem & 0xfff;
+   rem      = (rem + 0x800) >> 12;
+   imm47_28 = rem & 0xfffff;
+
+   /* lui dst, imm47_28 */
+   p = emit_U(p, 0b0110111, dst, imm47_28);
+   /* addiw dst, dst, imm27_16 */
+   p = emit_I(p, 0b0011011, dst, 0b000, dst, imm27_16);
+   /* c.slli dst, 12 */
+   p = emit_CI(p, 0b10, 12, dst, 0b000);
+   /* addi dst, dst, imm15_4 */
+   p = emit_I(p, 0b0010011, dst, 0b000, dst, imm15_4);
+   /* c.slli dst, 4 */
+   p = emit_CI(p, 0b10, 4, dst, 0b000);
+   if (imm3_0 != 0) {
+      /* c.addi dst, imm3_0 */
+      p = emit_CI(p, 0b01, vex_sx_to_64(imm3_0, 4) & 0x3f, dst, 0b000);
+   } else {
+      /* c.nop */
+      p = emit_CI(p, 0b01, 0, 0, 0b000);
+   }
+
+   return p;
+}
+
+/* Check whether p points at an instruction sequence cooked up by
+   addr48_to_ireg_EXACTLY_18B(). */
+static Bool is_addr48_to_ireg_EXACTLY_18B(UChar* p, UInt dst, ULong imm48)
+{
+   UChar  tmp[18];
+   UChar* q;
+
+   q = addr48_to_ireg_EXACTLY_18B(&tmp[0], dst, imm48);
+   if (q - &tmp[0] != 18)
+      return False;
+
+   q = &tmp[0];
+   for (UInt i = 0; i < 18; i++) {
+      if (*p != *q)
+         return False;
+      p++;
+      q++;
+   }
+   return True;
+}
+
+/* Emit an instruction into buf and return the number of bytes used. Note that
+   buf is not the insn's final place, and therefore it is imperative to emit
+   position-independent code. If the emitted instruction was a profiler inc, set
+   *is_profInc to True, else leave it unchanged. */
+Int emit_RISCV64Instr(/*MB_MOD*/ Bool*    is_profInc,
+                      UChar*              buf,
+                      Int                 nbuf,
+                      const RISCV64Instr* i,
+                      Bool                mode64,
+                      VexEndness          endness_host,
+                      const void*         disp_cp_chain_me_to_slowEP,
+                      const void*         disp_cp_chain_me_to_fastEP,
+                      const void*         disp_cp_xindir,
+                      const void*         disp_cp_xassisted)
+{
+   vassert(nbuf >= 32);
+   vassert(mode64 == True);
+   vassert(((HWord)buf & 1) == 0);
+
+   UChar* p = &buf[0];
+
+   switch (i->tag) {
+   case RISCV64in_LI:
+      p = imm64_to_ireg(p, iregEnc(i->RISCV64in.LI.dst), i->RISCV64in.LI.imm64);
+      goto done;
+   case RISCV64in_MV: {
+      /* c.mv dst, src */
+      UInt dst = iregEnc(i->RISCV64in.MV.dst);
+      UInt src = iregEnc(i->RISCV64in.MV.src);
+
+      p = emit_CR(p, 0b10, src, dst, 0b1000);
+      goto done;
+   }
+   case RISCV64in_ALU: {
+      /* <op> dst, src1, src2 */
+      UInt dst  = iregEnc(i->RISCV64in.ALU.dst);
+      UInt src1 = iregEnc(i->RISCV64in.ALU.src1);
+      UInt src2 = iregEnc(i->RISCV64in.ALU.src2);
+      switch (i->RISCV64in.ALU.op) {
+      case RISCV64op_ADD:
+         p = emit_R(p, 0b0110011, dst, 0b000, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_SUB:
+         p = emit_R(p, 0b0110011, dst, 0b000, src1, src2, 0b0100000);
+         goto done;
+      case RISCV64op_ADDW:
+         p = emit_R(p, 0b0111011, dst, 0b000, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_SUBW:
+         p = emit_R(p, 0b0111011, dst, 0b000, src1, src2, 0b0100000);
+         goto done;
+      case RISCV64op_XOR:
+         p = emit_R(p, 0b0110011, dst, 0b100, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_OR:
+         p = emit_R(p, 0b0110011, dst, 0b110, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_AND:
+         p = emit_R(p, 0b0110011, dst, 0b111, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_SLL:
+         p = emit_R(p, 0b0110011, dst, 0b001, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_SRL:
+         p = emit_R(p, 0b0110011, dst, 0b101, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_SRA:
+         p = emit_R(p, 0b0110011, dst, 0b101, src1, src2, 0b0100000);
+         goto done;
+      case RISCV64op_SLLW:
+         p = emit_R(p, 0b0111011, dst, 0b001, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_SRLW:
+         p = emit_R(p, 0b0111011, dst, 0b101, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_SRAW:
+         p = emit_R(p, 0b0111011, dst, 0b101, src1, src2, 0b0100000);
+         goto done;
+      case RISCV64op_SLT:
+         p = emit_R(p, 0b0110011, dst, 0b010, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_SLTU:
+         p = emit_R(p, 0b0110011, dst, 0b011, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_MUL:
+         p = emit_R(p, 0b0110011, dst, 0b000, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_MULH:
+         p = emit_R(p, 0b0110011, dst, 0b001, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_MULHU:
+         p = emit_R(p, 0b0110011, dst, 0b011, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_DIV:
+         p = emit_R(p, 0b0110011, dst, 0b100, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_DIVU:
+         p = emit_R(p, 0b0110011, dst, 0b101, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_REM:
+         p = emit_R(p, 0b0110011, dst, 0b110, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_REMU:
+         p = emit_R(p, 0b0110011, dst, 0b111, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_MULW:
+         p = emit_R(p, 0b0111011, dst, 0b000, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_DIVW:
+         p = emit_R(p, 0b0111011, dst, 0b100, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_DIVUW:
+         p = emit_R(p, 0b0111011, dst, 0b101, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_REMW:
+         p = emit_R(p, 0b0111011, dst, 0b110, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_REMUW:
+         p = emit_R(p, 0b0111011, dst, 0b111, src1, src2, 0b0000001);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_ALUImm: {
+      /* <op> dst, src, imm12 */
+      UInt dst   = iregEnc(i->RISCV64in.ALUImm.dst);
+      UInt src   = iregEnc(i->RISCV64in.ALUImm.src);
+      Int  imm12 = i->RISCV64in.ALUImm.imm12;
+      switch (i->RISCV64in.ALUImm.op) {
+      case RISCV64op_ADDI:
+         vassert(imm12 >= -2048 && imm12 < 2048);
+         p = emit_I(p, 0b0010011, dst, 0b000, src, imm12 & 0xfff);
+         goto done;
+      case RISCV64op_ADDIW:
+         vassert(imm12 >= -2048 && imm12 < 2048);
+         p = emit_I(p, 0b0011011, dst, 0b000, src, imm12 & 0xfff);
+         goto done;
+      case RISCV64op_XORI:
+         vassert(imm12 >= -2048 && imm12 < 2048);
+         p = emit_I(p, 0b0010011, dst, 0b100, src, imm12 & 0xfff);
+         goto done;
+      case RISCV64op_ANDI:
+         vassert(imm12 >= -2048 && imm12 < 2048);
+         p = emit_I(p, 0b0010011, dst, 0b111, src, imm12 & 0xfff);
+         goto done;
+      case RISCV64op_SLLI:
+         vassert(imm12 >= 0 && imm12 < 64);
+         p = emit_I(p, 0b0010011, dst, 0b001, src, (0b000000 << 6) | imm12);
+         goto done;
+      case RISCV64op_SRLI:
+         vassert(imm12 >= 0 && imm12 < 64);
+         p = emit_I(p, 0b0010011, dst, 0b101, src, (0b000000 << 6) | imm12);
+         goto done;
+      case RISCV64op_SRAI:
+         vassert(imm12 >= 0 && imm12 < 64);
+         p = emit_I(p, 0b0010011, dst, 0b101, src, (0b010000 << 6) | imm12);
+         goto done;
+      case RISCV64op_SLTIU:
+         vassert(imm12 >= -2048 && imm12 < 2048);
+         p = emit_I(p, 0b0010011, dst, 0b011, src, imm12 & 0xfff);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_Load: {
+      /* l<size> dst, soff12(base) */
+      UInt dst    = iregEnc(i->RISCV64in.Load.dst);
+      UInt base   = iregEnc(i->RISCV64in.Load.base);
+      Int  soff12 = i->RISCV64in.Load.soff12;
+      vassert(soff12 >= -2048 && soff12 < 2048);
+      UInt imm11_0 = soff12 & 0xfff;
+      switch (i->RISCV64in.Load.op) {
+      case RISCV64op_LD:
+         p = emit_I(p, 0b0000011, dst, 0b011, base, imm11_0);
+         goto done;
+      case RISCV64op_LW:
+         p = emit_I(p, 0b0000011, dst, 0b010, base, imm11_0);
+         goto done;
+      case RISCV64op_LH:
+         p = emit_I(p, 0b0000011, dst, 0b001, base, imm11_0);
+         goto done;
+      case RISCV64op_LB:
+         p = emit_I(p, 0b0000011, dst, 0b000, base, imm11_0);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_Store: {
+      /* s<size> src, soff12(base) */
+      UInt src    = iregEnc(i->RISCV64in.Store.src);
+      UInt base   = iregEnc(i->RISCV64in.Store.base);
+      Int  soff12 = i->RISCV64in.Store.soff12;
+      vassert(soff12 >= -2048 && soff12 < 2048);
+      UInt imm11_0 = soff12 & 0xfff;
+      switch (i->RISCV64in.Store.op) {
+      case RISCV64op_SD:
+         p = emit_S(p, 0b0100011, imm11_0, 0b011, base, src);
+         goto done;
+      case RISCV64op_SW:
+         p = emit_S(p, 0b0100011, imm11_0, 0b010, base, src);
+         goto done;
+      case RISCV64op_SH:
+         p = emit_S(p, 0b0100011, imm11_0, 0b001, base, src);
+         goto done;
+      case RISCV64op_SB:
+         p = emit_S(p, 0b0100011, imm11_0, 0b000, base, src);
+         goto done;
+      }
+      goto done;
+   }
+   case RISCV64in_LoadR: {
+      /* lr.<size> dst, (addr) */
+      UInt dst  = iregEnc(i->RISCV64in.LoadR.dst);
+      UInt addr = iregEnc(i->RISCV64in.LoadR.addr);
+      switch (i->RISCV64in.LoadR.op) {
+      case RISCV64op_LR_W:
+         p = emit_R(p, 0b0101111, dst, 0b010, addr, 0b00000, 0b0001000);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_StoreC: {
+      /* sc.<size> res, dst, (addr) */
+      UInt res  = iregEnc(i->RISCV64in.StoreC.res);
+      UInt src  = iregEnc(i->RISCV64in.StoreC.src);
+      UInt addr = iregEnc(i->RISCV64in.StoreC.addr);
+      switch (i->RISCV64in.StoreC.op) {
+      case RISCV64op_SC_W:
+         p = emit_R(p, 0b0101111, res, 0b010, addr, src, 0b0001100);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_CSRRW: {
+      /* csrrw dst, csr, src */
+      UInt dst = iregEnc(i->RISCV64in.CSRRW.dst);
+      UInt src = iregEnc(i->RISCV64in.CSRRW.src);
+      UInt csr = i->RISCV64in.CSRRW.csr;
+      vassert(csr < 4096);
+
+      p = emit_I(p, 0b1110011, dst, 0b001, src, csr);
+      goto done;
+   }
+   case RISCV64in_FpUnary: {
+      /* f<op> dst, src */
+      UInt dst = fregEnc(i->RISCV64in.FpUnary.dst);
+      UInt src = fregEnc(i->RISCV64in.FpUnary.src);
+      switch (i->RISCV64in.FpUnary.op) {
+      case RISCV64op_FSQRT_S:
+         p = emit_R(p, 0b1010011, dst, 0b111, src, 0b00000, 0b0101100);
+         goto done;
+      case RISCV64op_FSQRT_D:
+         p = emit_R(p, 0b1010011, dst, 0b111, src, 0b00000, 0b0101101);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_FpBinary: {
+      /* f<op> dst, src1, src2 */
+      UInt dst  = fregEnc(i->RISCV64in.FpBinary.dst);
+      UInt src1 = fregEnc(i->RISCV64in.FpBinary.src1);
+      UInt src2 = fregEnc(i->RISCV64in.FpBinary.src2);
+      switch (i->RISCV64in.FpBinary.op) {
+      case RISCV64op_FADD_S:
+         p = emit_R(p, 0b1010011, dst, 0b111, src1, src2, 0b0000000);
+         goto done;
+      case RISCV64op_FMUL_S:
+         p = emit_R(p, 0b1010011, dst, 0b111, src1, src2, 0b0001000);
+         goto done;
+      case RISCV64op_FDIV_S:
+         p = emit_R(p, 0b1010011, dst, 0b111, src1, src2, 0b0001100);
+         goto done;
+      case RISCV64op_FSGNJN_S:
+         p = emit_R(p, 0b1010011, dst, 0b001, src1, src2, 0b0010000);
+         goto done;
+      case RISCV64op_FSGNJX_S:
+         p = emit_R(p, 0b1010011, dst, 0b010, src1, src2, 0b0010000);
+         goto done;
+      case RISCV64op_FMIN_S:
+         p = emit_R(p, 0b1010011, dst, 0b000, src1, src2, 0b0010100);
+         goto done;
+      case RISCV64op_FMAX_S:
+         p = emit_R(p, 0b1010011, dst, 0b001, src1, src2, 0b0010100);
+         goto done;
+      case RISCV64op_FADD_D:
+         p = emit_R(p, 0b1010011, dst, 0b111, src1, src2, 0b0000001);
+         goto done;
+      case RISCV64op_FSUB_D:
+         p = emit_R(p, 0b1010011, dst, 0b111, src1, src2, 0b0000101);
+         goto done;
+      case RISCV64op_FMUL_D:
+         p = emit_R(p, 0b1010011, dst, 0b111, src1, src2, 0b0001001);
+         goto done;
+      case RISCV64op_FDIV_D:
+         p = emit_R(p, 0b1010011, dst, 0b111, src1, src2, 0b0001101);
+         goto done;
+      case RISCV64op_FSGNJN_D:
+         p = emit_R(p, 0b1010011, dst, 0b001, src1, src2, 0b0010001);
+         goto done;
+      case RISCV64op_FSGNJX_D:
+         p = emit_R(p, 0b1010011, dst, 0b010, src1, src2, 0b0010001);
+         goto done;
+      case RISCV64op_FMIN_D:
+         p = emit_R(p, 0b1010011, dst, 0b000, src1, src2, 0b0010101);
+         goto done;
+      case RISCV64op_FMAX_D:
+         p = emit_R(p, 0b1010011, dst, 0b001, src1, src2, 0b0010101);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_FpTernary: {
+      /* f<op> dst, src1, src2, src3 */
+      UInt dst  = fregEnc(i->RISCV64in.FpTernary.dst);
+      UInt src1 = fregEnc(i->RISCV64in.FpTernary.src1);
+      UInt src2 = fregEnc(i->RISCV64in.FpTernary.src2);
+      UInt src3 = fregEnc(i->RISCV64in.FpTernary.src3);
+      switch (i->RISCV64in.FpTernary.op) {
+      case RISCV64op_FMADD_S:
+         p = emit_R(p, 0b1000011, dst, 0b111, src1, src2, src3 << 2 | 0b00);
+         goto done;
+      case RISCV64op_FMADD_D:
+         p = emit_R(p, 0b1000011, dst, 0b111, src1, src2, src3 << 2 | 0b01);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_FpMove: {
+      /* f<op> dst, src */
+      UInt dst, src;
+      switch (i->RISCV64in.FpMove.op) {
+      case RISCV64op_FMV_X_W:
+         dst = iregEnc(i->RISCV64in.FpMove.dst);
+         src = fregEnc(i->RISCV64in.FpMove.src);
+         p   = emit_R(p, 0b1010011, dst, 0b000, src, 0b00000, 0b1110000);
+         goto done;
+      case RISCV64op_FMV_W_X:
+         dst = fregEnc(i->RISCV64in.FpMove.dst);
+         src = iregEnc(i->RISCV64in.FpMove.src);
+         p   = emit_R(p, 0b1010011, dst, 0b000, src, 0b00000, 0b1111000);
+         goto done;
+      case RISCV64op_FMV_D:
+         dst = fregEnc(i->RISCV64in.FpMove.dst);
+         src = fregEnc(i->RISCV64in.FpMove.src);
+         p   = emit_R(p, 0b1010011, dst, 0b000, src, src, 0b0010001);
+         goto done;
+      case RISCV64op_FMV_X_D:
+         dst = iregEnc(i->RISCV64in.FpMove.dst);
+         src = fregEnc(i->RISCV64in.FpMove.src);
+         p   = emit_R(p, 0b1010011, dst, 0b000, src, 0b00000, 0b1110001);
+         goto done;
+      case RISCV64op_FMV_D_X:
+         dst = fregEnc(i->RISCV64in.FpMove.dst);
+         src = iregEnc(i->RISCV64in.FpMove.src);
+         p   = emit_R(p, 0b1010011, dst, 0b000, src, 0b00000, 0b1111001);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_FpConvert: {
+      /* f<op> dst, src */
+      UInt dst, src;
+      switch (i->RISCV64in.FpConvert.op) {
+      case RISCV64op_FCVT_W_S:
+         dst = iregEnc(i->RISCV64in.FpConvert.dst);
+         src = fregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00000, 0b1100000);
+         goto done;
+      case RISCV64op_FCVT_WU_S:
+         dst = iregEnc(i->RISCV64in.FpConvert.dst);
+         src = fregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00001, 0b1100000);
+         goto done;
+      case RISCV64op_FCVT_S_W:
+         dst = fregEnc(i->RISCV64in.FpConvert.dst);
+         src = iregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00000, 0b1101000);
+         goto done;
+      case RISCV64op_FCVT_S_WU:
+         dst = fregEnc(i->RISCV64in.FpConvert.dst);
+         src = iregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00001, 0b1101000);
+         goto done;
+      case RISCV64op_FCVT_L_S:
+         dst = iregEnc(i->RISCV64in.FpConvert.dst);
+         src = fregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00010, 0b1100000);
+         goto done;
+      case RISCV64op_FCVT_LU_S:
+         dst = iregEnc(i->RISCV64in.FpConvert.dst);
+         src = fregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00011, 0b1100000);
+         goto done;
+      case RISCV64op_FCVT_S_L:
+         dst = fregEnc(i->RISCV64in.FpConvert.dst);
+         src = iregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00010, 0b1101000);
+         goto done;
+      case RISCV64op_FCVT_S_LU:
+         dst = fregEnc(i->RISCV64in.FpConvert.dst);
+         src = iregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00011, 0b1101000);
+         goto done;
+      case RISCV64op_FCVT_S_D:
+         dst = fregEnc(i->RISCV64in.FpConvert.dst);
+         src = fregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00001, 0b0100000);
+         goto done;
+      case RISCV64op_FCVT_D_S:
+         dst = fregEnc(i->RISCV64in.FpConvert.dst);
+         src = fregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00000, 0b0100001);
+         goto done;
+      case RISCV64op_FCVT_W_D:
+         dst = iregEnc(i->RISCV64in.FpConvert.dst);
+         src = fregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00000, 0b1100001);
+         goto done;
+      case RISCV64op_FCVT_WU_D:
+         dst = iregEnc(i->RISCV64in.FpConvert.dst);
+         src = fregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00001, 0b1100001);
+         goto done;
+      case RISCV64op_FCVT_D_W:
+         dst = fregEnc(i->RISCV64in.FpConvert.dst);
+         src = iregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00000, 0b1101001);
+         goto done;
+      case RISCV64op_FCVT_D_WU:
+         dst = fregEnc(i->RISCV64in.FpConvert.dst);
+         src = iregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00001, 0b1101001);
+         goto done;
+      case RISCV64op_FCVT_L_D:
+         dst = iregEnc(i->RISCV64in.FpConvert.dst);
+         src = fregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00010, 0b1100001);
+         goto done;
+      case RISCV64op_FCVT_LU_D:
+         dst = iregEnc(i->RISCV64in.FpConvert.dst);
+         src = fregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00011, 0b1100001);
+         goto done;
+      case RISCV64op_FCVT_D_L:
+         dst = fregEnc(i->RISCV64in.FpConvert.dst);
+         src = iregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00010, 0b1101001);
+         goto done;
+      case RISCV64op_FCVT_D_LU:
+         dst = fregEnc(i->RISCV64in.FpConvert.dst);
+         src = iregEnc(i->RISCV64in.FpConvert.src);
+         p   = emit_R(p, 0b1010011, dst, 0b111, src, 0b00011, 0b1101001);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_FpCompare: {
+      /* f<op> dst, src1, src2 */
+      UInt dst  = iregEnc(i->RISCV64in.FpCompare.dst);
+      UInt src1 = fregEnc(i->RISCV64in.FpCompare.src1);
+      UInt src2 = fregEnc(i->RISCV64in.FpCompare.src2);
+      switch (i->RISCV64in.FpCompare.op) {
+      case RISCV64op_FEQ_S:
+         p = emit_R(p, 0b1010011, dst, 0b010, src1, src2, 0b1010000);
+         goto done;
+      case RISCV64op_FLT_S:
+         p = emit_R(p, 0b1010011, dst, 0b001, src1, src2, 0b1010000);
+         goto done;
+      case RISCV64op_FEQ_D:
+         p = emit_R(p, 0b1010011, dst, 0b010, src1, src2, 0b1010001);
+         goto done;
+      case RISCV64op_FLT_D:
+         p = emit_R(p, 0b1010011, dst, 0b001, src1, src2, 0b1010001);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_FpLdSt: {
+      /* f<op> reg, soff12(base) */
+      UInt reg    = fregEnc(i->RISCV64in.FpLdSt.reg);
+      UInt base   = iregEnc(i->RISCV64in.FpLdSt.base);
+      Int  soff12 = i->RISCV64in.FpLdSt.soff12;
+      vassert(soff12 >= -2048 && soff12 < 2048);
+      UInt imm11_0 = soff12 & 0xfff;
+      switch (i->RISCV64in.FpLdSt.op) {
+      case RISCV64op_FLW:
+         p = emit_I(p, 0b0000111, reg /*dst*/, 0b010, base, imm11_0);
+         goto done;
+      case RISCV64op_FLD:
+         p = emit_I(p, 0b0000111, reg /*dst*/, 0b011, base, imm11_0);
+         goto done;
+      case RISCV64op_FSW:
+         p = emit_S(p, 0b0100111, imm11_0, 0b010, base, reg /*src*/);
+         goto done;
+      case RISCV64op_FSD:
+         p = emit_S(p, 0b0100111, imm11_0, 0b011, base, reg /*src*/);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_CAS: {
+      /* 1: lr.<size> old, (addr)
+            bne old, expd, 2f
+            sc.<size> t0, data, (addr)
+            bne t0, zero, 1b
+         2:
+       */
+      UInt old  = iregEnc(i->RISCV64in.CAS.old);
+      UInt addr = iregEnc(i->RISCV64in.CAS.addr);
+      UInt expd = iregEnc(i->RISCV64in.CAS.expd);
+      UInt data = iregEnc(i->RISCV64in.CAS.data);
+      switch (i->RISCV64in.CAS.op) {
+      case RISCV64op_CAS_D:
+         p = emit_R(p, 0b0101111, old, 0b011, addr, 0b00000, 0b0001000);
+         p = emit_B(p, 0b1100011, (12 >> 1) & 0xfff, 0b001, old, expd);
+         p = emit_R(p, 0b0101111, 5 /*x5/t0*/, 0b011, addr, data, 0b0001100);
+         p = emit_B(p, 0b1100011, (-12 >> 1) & 0xfff, 0b001, 5 /*x5/t0*/,
+                    0 /*x0/zero*/);
+         goto done;
+      case RISCV64op_CAS_W:
+         p = emit_R(p, 0b0101111, old, 0b010, addr, 0b00000, 0b0001000);
+         p = emit_B(p, 0b1100011, (12 >> 1) & 0xfff, 0b001, old, expd);
+         p = emit_R(p, 0b0101111, 5 /*x5/t0*/, 0b010, addr, data, 0b0001100);
+         p = emit_B(p, 0b1100011, (-12 >> 1) & 0xfff, 0b001, 5 /*x5/t0*/,
+                    0 /*x0/zero*/);
+         goto done;
+      }
+      break;
+   }
+   case RISCV64in_FENCE: {
+      /* fence */
+      p = emit_I(p, 0b0001111, 0b00000, 0b000, 0b00000, 0b000011111111);
+      goto done;
+   }
+   case RISCV64in_CSEL: {
+      /*    beq cond, zero, 1f
+            c.mv dst, iftrue
+            c.j 2f
+         1: c.mv dst, iffalse
+         2:
+       */
+      UInt dst     = iregEnc(i->RISCV64in.CSEL.dst);
+      UInt iftrue  = iregEnc(i->RISCV64in.CSEL.iftrue);
+      UInt iffalse = iregEnc(i->RISCV64in.CSEL.iffalse);
+      UInt cond    = iregEnc(i->RISCV64in.CSEL.cond);
+
+      p = emit_B(p, 0b1100011, (8 >> 1) & 0xfff, 0b000, cond, 0 /*x0/zero*/);
+      p = emit_CR(p, 0b10, iftrue, dst, 0b1000);
+      p = emit_CJ(p, 0b01, (4 >> 1) & 0x7ff, 0b101);
+      p = emit_CR(p, 0b10, iffalse, dst, 0b1000);
+      goto done;
+   }
+   case RISCV64in_Call: {
+      /*    beq cond, zero, 1f
+            li t0, target
+            c.jalr 0(t0)
+         1:
+       */
+      UChar* ptmp = NULL;
+      if (!hregIsInvalid(i->RISCV64in.Call.cond)) {
+         ptmp = p;
+         p += 4;
+      }
+
+      /* li t0, target */
+      p = imm64_to_ireg(p, 5 /*x5/t0*/, i->RISCV64in.Call.target);
+
+      /* c.jalr 0(t0) */
+      p = emit_CR(p, 0b10, 0 /*x0/zero*/, 5 /*x5/t0*/, 0b1001);
+
+      /* Fix up the conditional jump, if there was one. */
+      if (!hregIsInvalid(i->RISCV64in.Call.cond)) {
+         /* beq cond, zero, delta */
+         UInt cond  = iregEnc(i->RISCV64in.Call.cond);
+         UInt delta = p - ptmp;
+         /* delta_min = 4 (beq) + 2 (c.li) + 2 (c.jalr) = 8 */
+         vassert(delta >= 8 && delta < 4096 && (delta & 1) == 0);
+         UInt imm12_1 = (delta >> 1) & 0xfff;
+
+         emit_B(ptmp, 0b1100011, imm12_1, 0b000, cond, 0 /*x0/zero*/);
+      }
+
+      goto done;
+   }
+
+   case RISCV64in_XDirect: {
+      /* NB: what goes on here has to be very closely coordinated with the
+         chainXDirect_RISCV64() and unchainXDirect_RISCV64() below. */
+      /* We're generating chain-me requests here, so we need to be sure this is
+         actually allowed -- no-redir translations can't use chain-me's.
+         Hence: */
+      vassert(disp_cp_chain_me_to_slowEP != NULL);
+      vassert(disp_cp_chain_me_to_fastEP != NULL);
+
+      /* First off, if this is conditional, create a conditional jump over the
+         rest of it. Or at least, leave a space for it that we will shortly fill
+         in. */
+      UChar* ptmp = NULL;
+      if (!hregIsInvalid(i->RISCV64in.XDirect.cond)) {
+         ptmp = p;
+         p += 4;
+      }
+
+      /* Update the guest pc. */
+      {
+         /* li t0, dstGA */
+         p = imm64_to_ireg(p, 5 /*x5/t0*/, i->RISCV64in.XDirect.dstGA);
+
+         /* sd t0, soff12(base) */
+         UInt base   = iregEnc(i->RISCV64in.XDirect.base);
+         Int  soff12 = i->RISCV64in.XDirect.soff12;
+         vassert(soff12 >= -2048 && soff12 < 2048);
+         UInt imm11_0 = soff12 & 0xfff;
+
+         p = emit_S(p, 0b0100011, imm11_0, 0b011, base, 5 /*x5/t0*/);
+      }
+
+      /* --- FIRST PATCHABLE BYTE follows --- */
+      /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling to) backs
+         up the return address, so as to find the address of the first patchable
+         byte. So: don't change the number of instructions (3) below. */
+      /* li t0, VG_(disp_cp_chain_me_to_{slowEP,fastEP}) */
+      const void* disp_cp_chain_me = i->RISCV64in.XDirect.toFastEP
+                                        ? disp_cp_chain_me_to_fastEP
+                                        : disp_cp_chain_me_to_slowEP;
+
+      p = addr48_to_ireg_EXACTLY_18B(p, 5 /*x5/t0*/, (ULong)disp_cp_chain_me);
+
+      /* c.jalr 0(t0) */
+      p = emit_CR(p, 0b10, 0 /*x0/zero*/, 5 /*x5/t0*/, 0b1001);
+      /* --- END of PATCHABLE BYTES --- */
+
+      /* Fix up the conditional jump, if there was one. */
+      if (!hregIsInvalid(i->RISCV64in.XDirect.cond)) {
+         /* beq cond, zero, delta */
+         UInt cond  = iregEnc(i->RISCV64in.XDirect.cond);
+         UInt delta = p - ptmp;
+         /* delta_min = 4 (beq) + 2 (c.li) + 4 (sd) + 18 (addr48) + 2 (c.jalr)
+                      = 30 */
+         vassert(delta >= 30 && delta < 4096 && (delta & 1) == 0);
+         UInt imm12_1 = (delta >> 1) & 0xfff;
+
+         emit_B(ptmp, 0b1100011, imm12_1, 0b000, cond, 0 /*x0/zero*/);
+      }
+
+      goto done;
+   }
+
+   case RISCV64in_XIndir: {
+      /* We're generating transfers that could lead indirectly to a chain-me, so
+         we need to be sure this is actually allowed -- no-redir translations
+         are not allowed to reach normal translations without going through the
+         scheduler. That means no XDirects or XIndirs out from no-redir
+         translations. Hence: */
+      vassert(disp_cp_xindir != NULL);
+
+      /* First off, if this is conditional, create a conditional jump over the
+         rest of it. Or at least, leave a space for it that we will shortly fill
+         in. */
+      UChar* ptmp = NULL;
+      if (!hregIsInvalid(i->RISCV64in.XIndir.cond)) {
+         ptmp = p;
+         p += 4;
+      }
+
+      /* Update the guest pc. */
+      {
+         /* sd r-dstGA, soff12(base) */
+         UInt src    = iregEnc(i->RISCV64in.XIndir.dstGA);
+         UInt base   = iregEnc(i->RISCV64in.XIndir.base);
+         Int  soff12 = i->RISCV64in.XIndir.soff12;
+         vassert(soff12 >= -2048 && soff12 < 2048);
+         UInt imm11_0 = soff12 & 0xfff;
+
+         p = emit_S(p, 0b0100011, imm11_0, 0b011, base, src);
+      }
+
+      /* li t0, VG_(disp_cp_xindir) */
+      p = imm64_to_ireg(p, 5 /*x5/t0*/, (ULong)disp_cp_xindir);
+
+      /* c.jr 0(t0) */
+      p = emit_CR(p, 0b10, 0 /*x0/zero*/, 5 /*x5/t0*/, 0b1000);
+
+      /* Fix up the conditional jump, if there was one. */
+      if (!hregIsInvalid(i->RISCV64in.XIndir.cond)) {
+         /* beq cond, zero, delta */
+         UInt cond  = iregEnc(i->RISCV64in.XIndir.cond);
+         UInt delta = p - ptmp;
+         /* delta_min = 4 (beq) + 4 (sd) + 2 (c.li) + 2 (c.jr) = 12 */
+         vassert(delta >= 12 && delta < 4096 && (delta & 1) == 0);
+         UInt imm12_1 = (delta >> 1) & 0xfff;
+
+         emit_B(ptmp, 0b1100011, imm12_1, 0b000, cond, 0 /*x0/zero*/);
+      }
+
+      goto done;
+   }
+
+   case RISCV64in_XAssisted: {
+      /* First off, if this is conditional, create a conditional jump over the
+         rest of it. Or at least, leave a space for it that we will shortly fill
+         in. */
+      UChar* ptmp = NULL;
+      if (!hregIsInvalid(i->RISCV64in.XAssisted.cond)) {
+         ptmp = p;
+         p += 4;
+      }
+
+      /* Update the guest pc. */
+      {
+         /* sd r-dstGA, soff12(base) */
+         UInt src    = iregEnc(i->RISCV64in.XAssisted.dstGA);
+         UInt base   = iregEnc(i->RISCV64in.XAssisted.base);
+         Int  soff12 = i->RISCV64in.XAssisted.soff12;
+         vassert(soff12 >= -2048 && soff12 < 2048);
+         UInt imm11_0 = soff12 & 0xfff;
+
+         p = emit_S(p, 0b0100011, imm11_0, 0b011, base, src);
+      }
+
+      /* li s0, $magic_number */
+      UInt trcval = 0;
+      switch (i->RISCV64in.XAssisted.jk) {
+      case Ijk_ClientReq:
+         trcval = VEX_TRC_JMP_CLIENTREQ;
+         break;
+      case Ijk_Sys_syscall:
+         trcval = VEX_TRC_JMP_SYS_SYSCALL;
+         break;
+      case Ijk_NoDecode:
+         trcval = VEX_TRC_JMP_NODECODE;
+         break;
+      case Ijk_InvalICache:
+         trcval = VEX_TRC_JMP_INVALICACHE;
+         break;
+      case Ijk_NoRedir:
+         trcval = VEX_TRC_JMP_NOREDIR;
+         break;
+      case Ijk_SigTRAP:
+         trcval = VEX_TRC_JMP_SIGTRAP;
+         break;
+      case Ijk_Boring:
+         trcval = VEX_TRC_JMP_BORING;
+         break;
+      default:
+         ppIRJumpKind(i->RISCV64in.XAssisted.jk);
+         vpanic("emit_RISCV64Instr.RISCV64in_XAssisted: unexpected jump kind");
+      }
+      vassert(trcval != 0);
+      p = imm64_to_ireg(p, 8 /*x8/s0*/, trcval);
+
+      /* li t0, VG_(disp_cp_xassisted) */
+      p = imm64_to_ireg(p, 5 /*x5/t0*/, (ULong)disp_cp_xassisted);
+
+      /* c.jr 0(t0) */
+      p = emit_CR(p, 0b10, 0 /*x0/zero*/, 5 /*x5/t0*/, 0b1000);
+
+      /* Fix up the conditional jump, if there was one. */
+      if (!hregIsInvalid(i->RISCV64in.XAssisted.cond)) {
+         /* beq cond, zero, delta */
+         UInt cond  = iregEnc(i->RISCV64in.XAssisted.cond);
+         UInt delta = p - ptmp;
+         /* delta_min = 4 (beq) + 4 (sd) + 2 (c.li) + 2 (c.li) + 2 (c.jr)
+                      = 14 */
+         vassert(delta >= 14 && delta < 4096 && (delta & 1) == 0);
+         UInt imm12_1 = (delta >> 1) & 0xfff;
+
+         emit_B(ptmp, 0b1100011, imm12_1, 0b000, cond, 0 /*x0/zero*/);
+      }
+
+      goto done;
+   }
+
+   case RISCV64in_EvCheck: {
+      /*    lw t0, soff12_amCounter(base_amCounter)
+            c.addiw t0, -1
+            sw t0, soff12_amCounter(base_amCounter)
+            bge t0, zero, 1f
+            ld t0, soff12_amFailAddr(base_amFailAddr)
+            c.jr 0(t0)
+         1:
+      */
+      UInt base_amCounter   = iregEnc(i->RISCV64in.EvCheck.base_amCounter);
+      Int  soff12_amCounter = i->RISCV64in.EvCheck.soff12_amCounter;
+      vassert(soff12_amCounter >= -2048 && soff12_amCounter < 2048);
+      UInt imm11_0_amCounter = soff12_amCounter & 0xfff;
+
+      UInt base_amFailAddr   = iregEnc(i->RISCV64in.EvCheck.base_amFailAddr);
+      Int  soff12_amFailAddr = i->RISCV64in.EvCheck.soff12_amFailAddr;
+      vassert(soff12_amFailAddr >= -2048 && soff12_amFailAddr < 2048);
+      UInt imm11_0_amFailAddr = soff12_amFailAddr & 0xfff;
+
+      p = emit_I(p, 0b0000011, 5 /*x5/t0*/, 0b010, base_amCounter,
+                 imm11_0_amCounter);
+      p = emit_CI(p, 0b01, -1 & 0x3f, 5 /*x5/t0*/, 0b001);
+      p = emit_S(p, 0b0100011, imm11_0_amCounter, 0b010, base_amCounter,
+                 5 /*x5/t0*/);
+      p = emit_B(p, 0b1100011, (10 >> 1) & 0xfff, 0b101, 5 /*x5/t0*/,
+                 0 /*x0/zero*/);
+      p = emit_I(p, 0b0000011, 5 /*x5/t0*/, 0b011, base_amFailAddr,
+                 imm11_0_amFailAddr);
+      p = emit_CR(p, 0b10, 0 /*x0/zero*/, 5 /*x5/t0*/, 0b1000);
+
+      /* Crosscheck. */
+      vassert(evCheckSzB_RISCV64() == p - buf);
+      goto done;
+   }
+
+   case RISCV64in_ProfInc: {
+      /* Generate a code template to increment a memory location whose address
+         will be known later as an immediate value. This code template will be
+         patched by LibVEX_PatchProfInc() once the memory location is known. For
+         now do this with address == 0x0000'6555'7555'8566.
+
+         li t1, 0x655575558566
+         ld t0, 0(t1)
+         c.addi t0, t0, 1
+         sd t0, 0(t1)
+       */
+      p = addr48_to_ireg_EXACTLY_18B(p, 6 /*x6/t1*/, 0x655575558566ULL);
+      p = emit_I(p, 0b0000011, 5 /*x5/t0*/, 0b011, 6 /*x6/t1*/, 0);
+      p = emit_CI(p, 0b01, 1, 5 /*x5/t0*/, 0b000);
+      p = emit_S(p, 0b0100011, 0, 0b011, 6 /*x6/t1*/, 5 /*x5/t0*/);
+      /* Tell the caller .. */
+      vassert(!*is_profInc);
+      *is_profInc = True;
+      goto done;
+   }
+
+   default:
+      goto bad;
+   }
+
+bad:
+   ppRISCV64Instr(i, mode64);
+   vpanic("emit_RISCV64Instr");
+   /*NOTREACHED*/
+
+done:
+   vassert(p - &buf[0] <= 44);
+   return p - &buf[0];
+}
+
+/* Return the number of bytes emitted for an RISCV64in_EvCheck, as produced by
+   emit_RISCV64Instr(). */
+Int evCheckSzB_RISCV64(void) { return 20; }
+
+/* NB: what goes on here has to be very closely coordinated with the emitInstr
+   case for XDirect, above. */
+VexInvalRange chainXDirect_RISCV64(VexEndness  endness_host,
+                                   void*       place_to_chain,
+                                   const void* disp_cp_chain_me_EXPECTED,
+                                   const void* place_to_jump_to)
+{
+   vassert(endness_host == VexEndnessLE);
+
+   /* What we're expecting to see is:
+        lui t0, disp_cp_chain_me_to_EXPECTED[47:28]'
+        addiw t0, t0, disp_cp_chain_me_to_EXPECTED[27:16]'
+        c.slli t0, 12
+        addi t0, t0, disp_cp_chain_me_to_EXPECTED[15:4]'
+        c.slli t0, 4
+        c.addi t0, disp_cp_chain_me_to_EXPECTED[3:0]'
+        c.jalr 0(t0)
+      viz
+        <18 bytes generated by addr48_to_ireg_EXACTLY_18B>
+        82 92
+   */
+   UChar* p = place_to_chain;
+   vassert(((HWord)p & 1) == 0);
+   vassert(is_addr48_to_ireg_EXACTLY_18B(p, 5 /*x5/t0*/,
+                                         (ULong)disp_cp_chain_me_EXPECTED));
+   vassert(p[18] == 0x82 && p[19] == 0x92);
+
+   /* And what we want to change it to is:
+        lui t0, place_to_jump[47:28]'
+        addiw t0, t0, place_to_jump[27:16]'
+        c.slli t0, 12
+        addi t0, t0, place_to_jump[15:4]'
+        c.slli t0, 4
+        c.addi t0, place_to_jump[3:0]'
+        c.jr 0(t0)
+      viz
+        <18 bytes generated by addr48_to_ireg_EXACTLY_18B>
+        82 82
+
+      The replacement has the same length as the original.
+   */
+   (void)addr48_to_ireg_EXACTLY_18B(p, 5 /*x5/t0*/, (ULong)place_to_jump_to);
+   p[18] = 0x82;
+   p[19] = 0x82;
+
+   VexInvalRange vir = {(HWord)p, 20};
+   return vir;
+}
+
+/* NB: what goes on here has to be very closely coordinated with the emitInstr
+   case for XDirect, above. */
+VexInvalRange unchainXDirect_RISCV64(VexEndness  endness_host,
+                                     void*       place_to_unchain,
+                                     const void* place_to_jump_to_EXPECTED,
+                                     const void* disp_cp_chain_me)
+{
+   vassert(endness_host == VexEndnessLE);
+
+   /* What we're expecting to see is:
+        lui t0, place_to_jump_to_EXPECTED[47:28]'
+        addiw t0, t0, place_to_jump_to_EXPECTED[27:16]'
+        c.slli t0, 12
+        addi t0, t0, place_to_jump_to_EXPECTED[15:4]'
+        c.slli t0, 4
+        c.addi t0, place_to_jump_to_EXPECTED[3:0]'
+        c.jr 0(t0)
+      viz
+        <18 bytes generated by addr48_to_ireg_EXACTLY_18B>
+        82 82
+   */
+   UChar* p = place_to_unchain;
+   vassert(((HWord)p & 1) == 0);
+   vassert(is_addr48_to_ireg_EXACTLY_18B(p, 5 /*x5/t0*/,
+                                         (ULong)place_to_jump_to_EXPECTED));
+   vassert(p[18] == 0x82 && p[19] == 0x82);
+
+   /* And what we want to change it to is:
+        lui t0, disp_cp_chain_me[47:28]'
+        addiw t0, t0, disp_cp_chain_me[27:16]'
+        c.slli t0, 12
+        addi t0, t0, disp_cp_chain_me[15:4]'
+        c.slli t0, 4
+        c.addi t0, disp_cp_chain_me[3:0]'
+        c.jalr 0(t0)
+      viz
+        <18 bytes generated by addr48_to_ireg_EXACTLY_18B>
+        82 92
+
+      The replacement has the same length as the original.
+   */
+   (void)addr48_to_ireg_EXACTLY_18B(p, 5 /*x5/t0*/, (ULong)disp_cp_chain_me);
+   p[18] = 0x82;
+   p[19] = 0x89;
+
+   VexInvalRange vir = {(HWord)p, 20};
+   return vir;
+}
+
+/* Patch the counter address into a profile inc point, as previously created by
+   the RISCV64in_ProfInc case for emit_RISCV64Instr(). */
+VexInvalRange patchProfInc_RISCV64(VexEndness   endness_host,
+                                   void*        place_to_patch,
+                                   const ULong* location_of_counter)
+{
+   vassert(sizeof(ULong*) == 8);
+   vassert(endness_host == VexEndnessLE);
+   UChar* p = place_to_patch;
+   vassert(((HWord)p & 3) == 0);
+   vassert(is_addr48_to_ireg_EXACTLY_18B(p, 6 /*x6/t1*/, 0x655575558566ULL));
+   vassert(p[18] == 0x83 && p[19] == 0x32 && p[20] == 0x03 && p[21] == 0x00);
+   vassert(p[22] == 0x85 && p[23] == 0x02);
+   vassert(p[24] == 0x23 && p[25] == 0x30 && p[26] == 0x53 && p[27] == 0x00);
+   (void)addr48_to_ireg_EXACTLY_18B(p, 6 /*x6/t1*/, (ULong)location_of_counter);
+   VexInvalRange vir = {(HWord)p, 28};
+   return vir;
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                      host_riscv64_defs.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/priv/host_riscv64_defs.h b/priv/host_riscv64_defs.h
new file mode 100644
index 000000000..1990fe3f5
--- /dev/null
+++ b/priv/host_riscv64_defs.h
@@ -0,0 +1,644 @@
+
+/*--------------------------------------------------------------------*/
+/*--- begin                                    host_riscv64_defs.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2020-2023 Petr Pavlu
+      petr.pavlu@dagobah.cz
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __VEX_HOST_RISCV64_DEFS_H
+#define __VEX_HOST_RISCV64_DEFS_H
+
+#include "libvex.h"
+#include "libvex_basictypes.h"
+
+#include "host_generic_regs.h"
+
+/*------------------------------------------------------------*/
+/*--- Registers                                            ---*/
+/*------------------------------------------------------------*/
+
+#define ST_IN static inline
+ST_IN HReg hregRISCV64_x18(void) { return mkHReg(False, HRcInt64, 18, 0); }
+ST_IN HReg hregRISCV64_x19(void) { return mkHReg(False, HRcInt64, 19, 1); }
+ST_IN HReg hregRISCV64_x20(void) { return mkHReg(False, HRcInt64, 20, 2); }
+ST_IN HReg hregRISCV64_x21(void) { return mkHReg(False, HRcInt64, 21, 3); }
+ST_IN HReg hregRISCV64_x22(void) { return mkHReg(False, HRcInt64, 22, 4); }
+ST_IN HReg hregRISCV64_x23(void) { return mkHReg(False, HRcInt64, 23, 5); }
+ST_IN HReg hregRISCV64_x24(void) { return mkHReg(False, HRcInt64, 24, 6); }
+ST_IN HReg hregRISCV64_x25(void) { return mkHReg(False, HRcInt64, 25, 7); }
+ST_IN HReg hregRISCV64_x26(void) { return mkHReg(False, HRcInt64, 26, 8); }
+ST_IN HReg hregRISCV64_x27(void) { return mkHReg(False, HRcInt64, 27, 9); }
+
+ST_IN HReg hregRISCV64_x10(void) { return mkHReg(False, HRcInt64, 10, 10); }
+ST_IN HReg hregRISCV64_x11(void) { return mkHReg(False, HRcInt64, 11, 11); }
+ST_IN HReg hregRISCV64_x12(void) { return mkHReg(False, HRcInt64, 12, 12); }
+ST_IN HReg hregRISCV64_x13(void) { return mkHReg(False, HRcInt64, 13, 13); }
+ST_IN HReg hregRISCV64_x14(void) { return mkHReg(False, HRcInt64, 14, 14); }
+ST_IN HReg hregRISCV64_x15(void) { return mkHReg(False, HRcInt64, 15, 15); }
+ST_IN HReg hregRISCV64_x16(void) { return mkHReg(False, HRcInt64, 16, 16); }
+ST_IN HReg hregRISCV64_x17(void) { return mkHReg(False, HRcInt64, 17, 17); }
+
+ST_IN HReg hregRISCV64_f0(void) { return mkHReg(False, HRcFlt64, 0, 18); }
+ST_IN HReg hregRISCV64_f1(void) { return mkHReg(False, HRcFlt64, 1, 19); }
+ST_IN HReg hregRISCV64_f2(void) { return mkHReg(False, HRcFlt64, 2, 20); }
+ST_IN HReg hregRISCV64_f3(void) { return mkHReg(False, HRcFlt64, 3, 21); }
+ST_IN HReg hregRISCV64_f4(void) { return mkHReg(False, HRcFlt64, 4, 22); }
+ST_IN HReg hregRISCV64_f5(void) { return mkHReg(False, HRcFlt64, 5, 23); }
+ST_IN HReg hregRISCV64_f6(void) { return mkHReg(False, HRcFlt64, 6, 24); }
+ST_IN HReg hregRISCV64_f7(void) { return mkHReg(False, HRcFlt64, 7, 25); }
+
+ST_IN HReg hregRISCV64_f10(void) { return mkHReg(False, HRcFlt64, 10, 26); }
+ST_IN HReg hregRISCV64_f11(void) { return mkHReg(False, HRcFlt64, 11, 27); }
+ST_IN HReg hregRISCV64_f12(void) { return mkHReg(False, HRcFlt64, 12, 28); }
+ST_IN HReg hregRISCV64_f13(void) { return mkHReg(False, HRcFlt64, 13, 29); }
+ST_IN HReg hregRISCV64_f14(void) { return mkHReg(False, HRcFlt64, 14, 30); }
+ST_IN HReg hregRISCV64_f15(void) { return mkHReg(False, HRcFlt64, 15, 31); }
+ST_IN HReg hregRISCV64_f16(void) { return mkHReg(False, HRcFlt64, 16, 32); }
+ST_IN HReg hregRISCV64_f17(void) { return mkHReg(False, HRcFlt64, 17, 33); }
+
+ST_IN HReg hregRISCV64_f28(void) { return mkHReg(False, HRcFlt64, 28, 34); }
+ST_IN HReg hregRISCV64_f29(void) { return mkHReg(False, HRcFlt64, 29, 35); }
+ST_IN HReg hregRISCV64_f30(void) { return mkHReg(False, HRcFlt64, 30, 36); }
+ST_IN HReg hregRISCV64_f31(void) { return mkHReg(False, HRcFlt64, 31, 37); }
+
+ST_IN HReg hregRISCV64_x0(void) { return mkHReg(False, HRcInt64, 0, 38); }
+ST_IN HReg hregRISCV64_x2(void) { return mkHReg(False, HRcInt64, 2, 39); }
+ST_IN HReg hregRISCV64_x8(void) { return mkHReg(False, HRcInt64, 8, 40); }
+#undef ST_IN
+
+/* Number of registers used for argument passing in function calls. */
+#define RISCV64_N_ARGREGS  8 /* x10/a0 .. x17/a7 */
+#define RISCV64_N_FARGREGS 8 /* f10/fa0 .. f17/fa7 */
+
+/*------------------------------------------------------------*/
+/*--- Instructions                                         ---*/
+/*------------------------------------------------------------*/
+
+/* RISCV64in_ALU sub-types. */
+typedef enum {
+   RISCV64op_ADD = 0x100, /* Addition of two registers. */
+   RISCV64op_SUB,         /* Subtraction of one register from another. */
+   RISCV64op_ADDW,        /* 32-bit addition of two registers. */
+   RISCV64op_SUBW,        /* 32-bit subtraction of one register from another. */
+   RISCV64op_XOR,         /* Bitwise XOR of two registers. */
+   RISCV64op_OR,          /* Bitwise OR of two registers. */
+   RISCV64op_AND,         /* Bitwise AND of two registers. */
+   RISCV64op_SLL,         /* Logical left shift on a register. */
+   RISCV64op_SRL,         /* Logical right shift on a register. */
+   RISCV64op_SRA,         /* Arithmetic right shift on a register. */
+   RISCV64op_SLLW,        /* 32-bit logical left shift on a register. */
+   RISCV64op_SRLW,        /* 32-bit logical right shift on a register. */
+   RISCV64op_SRAW,        /* 32-bit arithmetic right shift on a register. */
+   RISCV64op_SLT,         /* Signed comparison of two registers. */
+   RISCV64op_SLTU,        /* Unsigned comparison of two registers. */
+   RISCV64op_MUL,         /* Multiplication of two registers, producing the
+                             lower 64 bits. */
+   RISCV64op_MULH,        /* Signed multiplication of two registers, producing
+                             the upper 64 bits. */
+   RISCV64op_MULHU,       /* Unsigned multiplication of two registers, producing
+                             the upper 64 bits. */
+   RISCV64op_DIV,         /* Signed division of one register by another. */
+   RISCV64op_DIVU,        /* Unsigned division of one register by another. */
+   RISCV64op_REM,         /* Remainder from signed division of one register by
+                             another. */
+   RISCV64op_REMU,        /* Remainder from unsigned division of one register by
+                             another. */
+   RISCV64op_MULW,        /* 32-bit multiplication of two registers, producing
+                             the lower 32 bits. */
+   RISCV64op_DIVW,        /* 32-bit signed division of one register by
+                             another. */
+   RISCV64op_DIVUW,       /* 32-bit unsigned division of one register by
+                             another. */
+   RISCV64op_REMW,        /* Remainder from 32-bit signed division of one
+                             register by another. */
+   RISCV64op_REMUW,       /* Remainder from 32-bit unsigned division of one
+                             register by another. */
+} RISCV64ALUOp;
+
+/* RISCV64in_ALUImm sub-types. */
+typedef enum {
+   RISCV64op_ADDI = 0x200, /* Addition of a register and a sx-12-bit
+                              immediate. */
+   RISCV64op_ADDIW,        /* 32-bit addition of a register and a sx-12-bit
+                              immediate. */
+   RISCV64op_XORI,         /* Bitwise XOR of a register and a sx-12-bit
+                              immediate. */
+   RISCV64op_ANDI,         /* Bitwise AND of a register and a sx-12-bit
+                              immediate. */
+   RISCV64op_SLLI,         /* Logical left shift on a register by a 6-bit
+                              immediate. */
+   RISCV64op_SRLI,         /* Logical right shift on a register by a 6-bit
+                              immediate. */
+   RISCV64op_SRAI,         /* Arithmetic right shift on a register by a 6-bit
+                              immediate. */
+   RISCV64op_SLTIU,        /* Unsigned comparison of a register and a sx-12-bit
+                              immediate. */
+} RISCV64ALUImmOp;
+
+/* RISCV64in_Load sub-types. */
+typedef enum {
+   RISCV64op_LD = 0x300, /* 64-bit load. */
+   RISCV64op_LW,         /* sx-32-to-64-bit load. */
+   RISCV64op_LH,         /* sx-16-to-64-bit load. */
+   RISCV64op_LB,         /* sx-8-to-64-bit load. */
+} RISCV64LoadOp;
+
+/* RISCV64in_Store sub-types. */
+typedef enum {
+   RISCV64op_SD = 0x400, /* 64-bit store. */
+   RISCV64op_SW,         /* 32-bit store. */
+   RISCV64op_SH,         /* 16-bit store. */
+   RISCV64op_SB,         /* 8-bit store. */
+} RISCV64StoreOp;
+
+/* RISCV64in_LoadR sub-types. */
+typedef enum {
+   RISCV64op_LR_W = 0x500, /* sx-32-to-64-bit load-reserved. */
+} RISCV64LoadROp;
+
+/* RISCV64in_StoreC sub-types. */
+typedef enum {
+   RISCV64op_SC_W = 0x600, /* 32-bit store-conditional. */
+} RISCV64StoreCOp;
+
+/* RISCV64in_FpUnary sub-types. */
+typedef enum {
+   RISCV64op_FSQRT_S = 0x700, /* Square root of a 32-bit floating-point
+                                 register. */
+   RISCV64op_FSQRT_D,         /* Square root of a 64-bit floating-point
+                                 register. */
+} RISCV64FpUnaryOp;
+
+/* RISCV64in_FpBinary sub-types. */
+typedef enum {
+   RISCV64op_FADD_S = 0x800, /* Addition of two 32-bit floating-point
+                                registers. */
+   RISCV64op_FMUL_S,         /* Multiplication of two 32-bit floating-point
+                                registers. */
+   RISCV64op_FDIV_S,         /* Division of a 32-bit floating-point register by
+                                another. */
+   RISCV64op_FSGNJN_S,       /* Copy of a 32-bit floating-point register to
+                                another with the sign bit taken from the second
+                                input and negated. */
+   RISCV64op_FSGNJX_S,       /* Copy of a 32-bit floating-point register to
+                                another with the sign bit XOR'ed from the second
+                                input. */
+   RISCV64op_FMIN_S,         /* Select minimum-number of two 32-bit
+                                floating-point registers. */
+   RISCV64op_FMAX_S,         /* Select maximum-number of two 32-bit
+                                floating-point registers. */
+   RISCV64op_FADD_D,         /* Addition of two 64-bit floating-point
+                                registers. */
+   RISCV64op_FSUB_D,         /* Subtraction of one 64-bit floating-point
+                                register from another. */
+   RISCV64op_FMUL_D,         /* Multiplication of two 64-bit floating-point
+                                registers. */
+   RISCV64op_FDIV_D,         /* Division of a 64-bit floating-point register by
+                                another. */
+   RISCV64op_FSGNJN_D,       /* Copy of a 64-bit floating-point register to
+                                another with the sign bit taken from the second
+                                input and negated. */
+   RISCV64op_FSGNJX_D,       /* Copy of a 64-bit floating-point register to
+                                another with the sign bit XOR'ed from the second
+                                input. */
+   RISCV64op_FMIN_D,         /* Select minimum-number of two 64-bit
+                                floating-point registers. */
+   RISCV64op_FMAX_D,         /* Select maximum-number of two 64-bit
+                                floating-point registers. */
+} RISCV64FpBinaryOp;
+
+/* RISCV64in_FpTernary sub-types. */
+typedef enum {
+   RISCV64op_FMADD_S = 0x900, /* Fused multiply-add of 32-bit floating-point
+                                 registers. */
+   RISCV64op_FMADD_D,         /* Fused multiply-add of 64-bit floating-point
+                                 registers. */
+} RISCV64FpTernaryOp;
+
+/* RISCV64in_FpMove sub-types. */
+typedef enum {
+   RISCV64op_FMV_X_W = 0xa00, /* Move as-is a 32-bit value from a floating-point
+                                 register to an integer register. */
+   RISCV64op_FMV_W_X,         /* Move as-is a 32-bit value from an integer
+                                 register to a floating-point register. */
+   RISCV64op_FMV_D,           /* Copy one 64-bit floating-point register to
+                                 another. */
+   RISCV64op_FMV_X_D,         /* Move as-is a 64-bit value from a floating-point
+                                 register to an integer register. */
+   RISCV64op_FMV_D_X,         /* Move as-is a 64-bit value from an integer
+                                 register to a floating-point register. */
+} RISCV64FpMoveOp;
+
+/* RISCV64in_FpConvert sub-types. */
+typedef enum {
+   RISCV64op_FCVT_W_S = 0xb00, /* Convert a 32-bit floating-point number to
+                                  a 32-bit signed integer. */
+   RISCV64op_FCVT_WU_S,        /* Convert a 32-bit floating-point number to
+                                  a 32-bit unsigned integer. */
+   RISCV64op_FCVT_S_W,         /* Convert a 32-bit signed integer to a 32-bit
+                                  floating-point number. */
+   RISCV64op_FCVT_S_WU,        /* Convert a 32-bit unsigned integer to a 32-bit
+                                  floating-point number. */
+   RISCV64op_FCVT_L_S,         /* Convert a 32-bit floating-point number to
+                                  a 64-bit signed integer. */
+   RISCV64op_FCVT_LU_S,        /* Convert a 32-bit floating-point number to
+                                  a 64-bit unsigned integer. */
+   RISCV64op_FCVT_S_L,         /* Convert a 64-bit signed integer to a 32-bit
+                                  floating-point number. */
+   RISCV64op_FCVT_S_LU,        /* Convert a 64-bit unsigned integer to a 32-bit
+                                  floating-point number. */
+   RISCV64op_FCVT_S_D,         /* Convert a 64-bit floating-point number to
+                                  a 32-bit floating-point number. */
+   RISCV64op_FCVT_D_S,         /* Convert a 32-bit floating-point number to
+                                  a 64-bit floating-point number. */
+   RISCV64op_FCVT_W_D,         /* Convert a 64-bit floating-point number to
+                                  a 32-bit signed integer. */
+   RISCV64op_FCVT_WU_D,        /* Convert a 64-bit floating-point number to
+                                  a 32-bit unsigned integer. */
+   RISCV64op_FCVT_D_W,         /* Convert a 32-bit signed integer to a 64-bit
+                                  floating-point number. */
+   RISCV64op_FCVT_D_WU,        /* Convert a 32-bit unsigned integer to a 64-bit
+                                  floating-point number. */
+   RISCV64op_FCVT_L_D,         /* Convert a 64-bit floating-point number to
+                                  a 64-bit signed integer. */
+   RISCV64op_FCVT_LU_D,        /* Convert a 64-bit floating-point number to
+                                  a 64-bit unsigned integer. */
+   RISCV64op_FCVT_D_L,         /* Convert a 64-bit signed integer to a 64-bit
+                                  floating-point number. */
+   RISCV64op_FCVT_D_LU,        /* Convert a 64-bit unsigned integer to a 64-bit
+                                  floating-point number. */
+} RISCV64FpConvertOp;
+
+/* RISCV64in_FpCompare sub-types. */
+typedef enum {
+   RISCV64op_FEQ_S = 0xc00, /* Equality comparison of two 32-bit floating-point
+                               registers. */
+   RISCV64op_FLT_S,         /* Less-than comparison of two 32-bit floating-point
+                               registers. */
+   RISCV64op_FEQ_D,         /* Equality comparison of two 64-bit floating-point
+                               registers. */
+   RISCV64op_FLT_D,         /* Less-than comparison of two 64-bit floating-point
+                               registers. */
+} RISCV64FpCompareOp;
+
+/* RISCV64in_FpLdSt sub-types. */
+typedef enum {
+   RISCV64op_FLW = 0xd00, /* 32-bit floating-point load. */
+   RISCV64op_FLD,         /* 64-bit floating-point load. */
+   RISCV64op_FSW,         /* 32-bit floating-point store. */
+   RISCV64op_FSD,         /* 64-bit floating-point store. */
+} RISCV64FpLdStOp;
+
+/* RISCV64in_CAS sub-types. */
+typedef enum {
+   RISCV64op_CAS_D = 0xe00, /* 64-bit compare-and-swap pseudoinstruction. */
+   RISCV64op_CAS_W,         /* 32-bit compare-and-swap pseudoinstruction. */
+} RISCV64CASOp;
+
+/* The kind of instructions. */
+typedef enum {
+   RISCV64in_LI = 0x52640000, /* Load immediate pseudoinstruction. */
+   RISCV64in_MV,              /* Copy one register to another. */
+   RISCV64in_ALU,             /* Computational binary instruction. */
+   RISCV64in_ALUImm,          /* Computational binary instruction, with
+                                 an immediate as the second input. */
+   RISCV64in_Load,            /* Load from memory (sign-extended). */
+   RISCV64in_Store,           /* Store to memory. */
+   RISCV64in_LoadR,           /* Load-reserved from memory (sign-extended). */
+   RISCV64in_StoreC,          /* Store-conditional to memory. */
+   RISCV64in_CSRRW,           /* Atomic swap of values in a CSR and an integer
+                                 register. */
+   RISCV64in_FpUnary,         /* Floating-point unary instruction. */
+   RISCV64in_FpBinary,        /* Floating-point binary instruction. */
+   RISCV64in_FpTernary,       /* Floating-point ternary instruction. */
+   RISCV64in_FpMove,          /* Floating-point move instruction. */
+   RISCV64in_FpConvert,       /* Floating-point convert instruction. */
+   RISCV64in_FpCompare,       /* Floating-point compare instruction. */
+   RISCV64in_FpLdSt,          /* Floating-point load/store instruction. */
+   RISCV64in_CAS,             /* Compare-and-swap pseudoinstruction. */
+   RISCV64in_FENCE,           /* Device I/O and memory fence. */
+   RISCV64in_CSEL,            /* Conditional-select pseudoinstruction. */
+   RISCV64in_Call,            /* Call pseudoinstruction. */
+   RISCV64in_XDirect,         /* Direct transfer to guest address. */
+   RISCV64in_XIndir,          /* Indirect transfer to guest address. */
+   RISCV64in_XAssisted,       /* Assisted transfer to guest address. */
+   RISCV64in_EvCheck,         /* Event check. */
+   RISCV64in_ProfInc          /* 64-bit profile counter increment. */
+} RISCV64InstrTag;
+
+typedef struct {
+   RISCV64InstrTag tag;
+   union {
+      /* Load immediate pseudoinstruction. */
+      struct {
+         HReg  dst;
+         ULong imm64;
+      } LI;
+      /* Copy one register to another. */
+      struct {
+         HReg dst;
+         HReg src;
+      } MV;
+      /* Computational binary instruction. */
+      struct {
+         RISCV64ALUOp op;
+         HReg         dst;
+         HReg         src1;
+         HReg         src2;
+      } ALU;
+      /* Computational binary instruction, with an immediate as the second
+         input. */
+      struct {
+         RISCV64ALUImmOp op;
+         HReg            dst;
+         HReg            src;
+         Int             imm12; /* simm12 or uimm6 */
+      } ALUImm;
+      /* Load from memory (sign-extended). */
+      struct {
+         RISCV64LoadOp op;
+         HReg          dst;
+         HReg          base;
+         Int           soff12; /* -2048 .. +2047 */
+      } Load;
+      /* Store to memory. */
+      struct {
+         RISCV64StoreOp op;
+         HReg           src;
+         HReg           base;
+         Int            soff12; /* -2048 .. +2047 */
+      } Store;
+      /* Load-reserved from memory (sign-extended). */
+      struct {
+         RISCV64LoadROp op;
+         HReg           dst;
+         HReg           addr;
+      } LoadR;
+      /* Store-conditional to memory. */
+      struct {
+         RISCV64StoreCOp op;
+         HReg            res;
+         HReg            src;
+         HReg            addr;
+      } StoreC;
+      /* Atomic swap of values in a CSR and an integer register. */
+      struct {
+         HReg dst;
+         HReg src;
+         UInt csr;
+      } CSRRW;
+      /* Floating-point unary instruction. */
+      struct {
+         RISCV64FpUnaryOp op;
+         HReg             dst;
+         HReg             src;
+      } FpUnary;
+      /* Floating-point binary instruction. */
+      struct {
+         RISCV64FpBinaryOp op;
+         HReg              dst;
+         HReg              src1;
+         HReg              src2;
+      } FpBinary;
+      /* Floating-point ternary instruction. */
+      struct {
+         RISCV64FpTernaryOp op;
+         HReg               dst;
+         HReg               src1;
+         HReg               src2;
+         HReg               src3;
+      } FpTernary;
+      /* Floating-point move instruction. */
+      struct {
+         RISCV64FpMoveOp op;
+         HReg            dst;
+         HReg            src;
+      } FpMove;
+      /* Floating-point convert instruction. */
+      struct {
+         RISCV64FpConvertOp op;
+         HReg               dst;
+         HReg               src;
+      } FpConvert;
+      /* Floating-point compare instruction. */
+      struct {
+         RISCV64FpCompareOp op;
+         HReg               dst;
+         HReg               src1;
+         HReg               src2;
+      } FpCompare;
+      /* Floating-point load/store instruction. */
+      struct {
+         RISCV64FpLdStOp op;
+         HReg            reg; /* dst for load, src for store */
+         HReg            base;
+         Int             soff12; /* -2048 .. +2047 */
+      } FpLdSt;
+      /* Compare-and-swap pseudoinstruction. */
+      struct {
+         RISCV64CASOp op;
+         HReg         old;
+         HReg         addr;
+         HReg         expd;
+         HReg         data;
+      } CAS;
+      /* Device I/O and memory fence. */
+      struct {
+      } FENCE;
+      /* Conditional-select pseudoinstruction. */
+      struct {
+         HReg dst;
+         HReg iftrue;
+         HReg iffalse;
+         HReg cond;
+      } CSEL;
+      /* Call pseudoinstruction. Call a target (an absolute address), on a given
+         condition register. */
+      struct {
+         RetLoc rloc;      /* Where the return value will be. */
+         Addr64 target;    /* Target address of the call. */
+         HReg   cond;      /* Condition, can be INVALID_HREG for "always". */
+         UChar  nArgRegs;  /* # regs carrying integer args: 0 .. 8 */
+         UChar  nFArgRegs; /* # regs carrying floating-point args: 0 .. 8 */
+      } Call;
+      /* Update the guest pc value, then exit requesting to chain to it. May be
+         conditional. */
+      struct {
+         Addr64 dstGA;    /* Next guest address. */
+         HReg   base;     /* Base to access the guest state. */
+         Int    soff12;   /* Offset from the base register to access pc. */
+         HReg   cond;     /* Condition, can be INVALID_HREG for "always". */
+         Bool   toFastEP; /* Chain to the slow or fast point? */
+      } XDirect;
+      /* Boring transfer to a guest address not known at JIT time. Not
+         chainable. May be conditional. */
+      struct {
+         HReg dstGA;  /* Next guest address. */
+         HReg base;   /* Base to access the guest state. */
+         Int  soff12; /* Offset from the base register to access pc. */
+         HReg cond;   /* Condition, can be INVALID_HREG for "always". */
+      } XIndir;
+      /* Assisted transfer to a guest address, most general case. Not chainable.
+         May be conditional. */
+      struct {
+         HReg       dstGA;  /* Next guest address. */
+         HReg       base;   /* Base to access the guest state. */
+         Int        soff12; /* Offset from the base register to access pc. */
+         HReg       cond;   /* Condition, can be INVALID_HREG for "always". */
+         IRJumpKind jk;
+      } XAssisted;
+      /* Event check. */
+      struct {
+         HReg base_amCounter;   /* Base to access the guest state for
+                                   host_EvC_Counter. */
+         Int soff12_amCounter;  /* Offset from the base register to access
+                                   host_EvC_COUNTER. */
+         HReg base_amFailAddr;  /* Base to access the guest state for for
+                                   host_EvC_FAILADDR. */
+         Int soff12_amFailAddr; /* Offset from the base register to access
+                                   host_EvC_FAILADDR. */
+      } EvCheck;
+      /* 64-bit profile counter increment. */
+      struct {
+         /* No fields. The address of the counter to inc is installed later,
+            post-translation, by patching it in, as it is not known at
+            translation time. */
+      } ProfInc;
+   } RISCV64in;
+} RISCV64Instr;
+
+RISCV64Instr* RISCV64Instr_LI(HReg dst, ULong imm64);
+RISCV64Instr* RISCV64Instr_MV(HReg dst, HReg src);
+RISCV64Instr* RISCV64Instr_ALU(RISCV64ALUOp op, HReg dst, HReg src1, HReg src2);
+RISCV64Instr*
+RISCV64Instr_ALUImm(RISCV64ALUImmOp op, HReg dst, HReg src, Int imm12);
+RISCV64Instr*
+RISCV64Instr_Load(RISCV64LoadOp op, HReg dst, HReg base, Int soff12);
+RISCV64Instr*
+RISCV64Instr_Store(RISCV64StoreOp op, HReg src, HReg base, Int soff12);
+RISCV64Instr* RISCV64Instr_LoadR(RISCV64LoadROp op, HReg dst, HReg addr);
+RISCV64Instr*
+RISCV64Instr_StoreC(RISCV64StoreCOp op, HReg res, HReg src, HReg addr);
+RISCV64Instr* RISCV64Instr_CSRRW(HReg dst, HReg src, UInt csr);
+RISCV64Instr* RISCV64Instr_FpUnary(RISCV64FpUnaryOp op, HReg dst, HReg src);
+RISCV64Instr*
+RISCV64Instr_FpBinary(RISCV64FpBinaryOp op, HReg dst, HReg src1, HReg src2);
+RISCV64Instr* RISCV64Instr_FpTernary(
+   RISCV64FpTernaryOp op, HReg dst, HReg src1, HReg src2, HReg src3);
+RISCV64Instr* RISCV64Instr_FpMove(RISCV64FpMoveOp op, HReg dst, HReg src);
+RISCV64Instr* RISCV64Instr_FpConvert(RISCV64FpConvertOp op, HReg dst, HReg src);
+RISCV64Instr*
+RISCV64Instr_FpCompare(RISCV64FpCompareOp op, HReg dst, HReg src1, HReg src2);
+RISCV64Instr*
+RISCV64Instr_FpLdSt(RISCV64FpLdStOp op, HReg reg, HReg base, Int soff12);
+RISCV64Instr*
+RISCV64Instr_CAS(RISCV64CASOp op, HReg old, HReg addr, HReg expd, HReg data);
+RISCV64Instr* RISCV64Instr_FENCE(void);
+RISCV64Instr* RISCV64Instr_CSEL(HReg dst, HReg iftrue, HReg iffalse, HReg cond);
+RISCV64Instr* RISCV64Instr_Call(
+   RetLoc rloc, Addr64 target, HReg cond, UChar nArgRegs, UChar nFArgRegs);
+RISCV64Instr* RISCV64Instr_XDirect(
+   Addr64 dstGA, HReg base, Int soff12, HReg cond, Bool toFastEP);
+RISCV64Instr* RISCV64Instr_XIndir(HReg dstGA, HReg base, Int soff12, HReg cond);
+RISCV64Instr* RISCV64Instr_XAssisted(
+   HReg dstGA, HReg base, Int soff12, HReg cond, IRJumpKind jk);
+RISCV64Instr* RISCV64Instr_EvCheck(HReg base_amCounter,
+                                   Int  soff12_amCounter,
+                                   HReg base_amFailAddr,
+                                   Int  soff12_amFailAddr);
+RISCV64Instr* RISCV64Instr_ProfInc(void);
+
+/*------------------------------------------------------------*/
+/*--- Misc helpers                                         ---*/
+/*------------------------------------------------------------*/
+
+static inline HReg get_baseblock_register(void) { return hregRISCV64_x8(); }
+#define BASEBLOCK_OFFSET_ADJUSTMENT 2048
+
+/*------------------------------------------------------------*/
+/* --- Interface exposed to VEX                           --- */
+/*------------------------------------------------------------*/
+
+UInt ppHRegRISCV64(HReg reg);
+
+void ppRISCV64Instr(const RISCV64Instr* i, Bool mode64);
+
+const RRegUniverse* getRRegUniverse_RISCV64(void);
+
+/* Some functions that insulate the register allocator from details of the
+   underlying instruction set. */
+void getRegUsage_RISCV64Instr(HRegUsage* u, const RISCV64Instr* i, Bool mode64);
+void mapRegs_RISCV64Instr(HRegRemap* m, RISCV64Instr* i, Bool mode64);
+
+void genSpill_RISCV64(
+   /*OUT*/ HInstr** i1, /*OUT*/ HInstr** i2, HReg rreg, Int offset, Bool);
+void genReload_RISCV64(
+   /*OUT*/ HInstr** i1, /*OUT*/ HInstr** i2, HReg rreg, Int offset, Bool);
+RISCV64Instr* genMove_RISCV64(HReg from, HReg to, Bool);
+
+Int emit_RISCV64Instr(/*MB_MOD*/ Bool*    is_profInc,
+                      UChar*              buf,
+                      Int                 nbuf,
+                      const RISCV64Instr* i,
+                      Bool                mode64,
+                      VexEndness          endness_host,
+                      const void*         disp_cp_chain_me_to_slowEP,
+                      const void*         disp_cp_chain_me_to_fastEP,
+                      const void*         disp_cp_xindir,
+                      const void*         disp_cp_xassisted);
+
+/* Return the number of bytes of code needed for an event check. */
+Int evCheckSzB_RISCV64(void);
+
+/* Perform a chaining and unchaining of an XDirect jump. */
+VexInvalRange chainXDirect_RISCV64(VexEndness  endness_host,
+                                   void*       place_to_chain,
+                                   const void* disp_cp_chain_me_EXPECTED,
+                                   const void* place_to_jump_to);
+
+VexInvalRange unchainXDirect_RISCV64(VexEndness  endness_host,
+                                     void*       place_to_unchain,
+                                     const void* place_to_jump_to_EXPECTED,
+                                     const void* disp_cp_chain_me);
+
+/* Patch the counter location into an existing ProfInc point. */
+VexInvalRange patchProfInc_RISCV64(VexEndness   endness_host,
+                                   void*        place_to_patch,
+                                   const ULong* location_of_counter);
+
+HInstrArray* iselSB_RISCV64(const IRSB*        bb,
+                            VexArch            arch_host,
+                            const VexArchInfo* archinfo_host,
+                            const VexAbiInfo*  vbi,
+                            Int                offs_Host_EvC_Counter,
+                            Int                offs_Host_EvC_FailAddr,
+                            Bool               chainingAllowed,
+                            Bool               addProfInc,
+                            Addr               max_ga);
+
+#endif /* ndef __VEX_HOST_RISCV64_DEFS_H */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                      host_riscv64_defs.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/priv/host_riscv64_isel.c b/priv/host_riscv64_isel.c
new file mode 100644
index 000000000..123201a70
--- /dev/null
+++ b/priv/host_riscv64_isel.c
@@ -0,0 +1,2087 @@
+
+/*--------------------------------------------------------------------*/
+/*--- begin                                    host_riscv64_isel.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2020-2023 Petr Pavlu
+      petr.pavlu@dagobah.cz
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "host_riscv64_defs.h"
+#include "main_globals.h"
+#include "main_util.h"
+
+/*------------------------------------------------------------*/
+/*--- ISelEnv                                              ---*/
+/*------------------------------------------------------------*/
+
+/* This carries around:
+
+   - A mapping from IRTemp to IRType, giving the type of any IRTemp we might
+     encounter. This is computed before insn selection starts, and does not
+     change.
+
+   - A mapping from IRTemp to HReg. This tells the insn selector which virtual
+     register is associated with each IRTemp temporary. This is computed before
+     insn selection starts, and does not change. We expect this mapping to map
+     precisely the same set of IRTemps as the type mapping does.
+
+     - vregmap   holds the primary register for the IRTemp.
+     - vregmapHI is only used for 128-bit integer-typed IRTemps. It holds the
+                 identity of a second 64-bit virtual HReg, which holds the high
+                 half of the value.
+
+   - The code array, that is, the insns selected so far.
+
+   - A counter, for generating new virtual registers.
+
+   - The host hardware capabilities word. This is set at the start and does not
+     change.
+
+   - A Bool for indicating whether we may generate chain-me instructions for
+     control flow transfers, or whether we must use XAssisted.
+
+   - The maximum guest address of any guest insn in this block. Actually, the
+     address of the highest-addressed byte from any insn in this block. Is set
+     at the start and does not change. This is used for detecting jumps which
+     are definitely forward-edges from this block, and therefore can be made
+     (chained) to the fast entry point of the destination, thereby avoiding the
+     destination's event check.
+
+   - An IRExpr*, which may be NULL, holding the IR expression (an
+     IRRoundingMode-encoded value) to which the FPU's rounding mode was most
+     recently set. Setting to NULL is always safe. Used to avoid redundant
+     settings of the FPU's rounding mode, as described in
+     set_fcsr_rounding_mode() below.
+
+   Note, this is all (well, mostly) host-independent.
+*/
+
+typedef struct {
+   /* Constant -- are set at the start and do not change. */
+   IRTypeEnv* type_env;
+
+   HReg* vregmap;
+   HReg* vregmapHI;
+   Int   n_vregmap;
+
+   UInt hwcaps;
+
+   Bool   chainingAllowed;
+   Addr64 max_ga;
+
+   /* These are modified as we go along. */
+   HInstrArray* code;
+   Int          vreg_ctr;
+
+   IRExpr* previous_rm;
+} ISelEnv;
+
+static HReg lookupIRTemp(ISelEnv* env, IRTemp tmp)
+{
+   vassert(tmp >= 0);
+   vassert(tmp < env->n_vregmap);
+   return env->vregmap[tmp];
+}
+
+static void addInstr(ISelEnv* env, RISCV64Instr* instr)
+{
+   addHInstr(env->code, instr);
+   if (vex_traceflags & VEX_TRACE_VCODE) {
+      ppRISCV64Instr(instr, True /*mode64*/);
+      vex_printf("\n");
+   }
+}
+
+static HReg newVRegI(ISelEnv* env)
+{
+   HReg reg = mkHReg(True /*virtual*/, HRcInt64, 0, env->vreg_ctr);
+   env->vreg_ctr++;
+   return reg;
+}
+
+static HReg newVRegF(ISelEnv* env)
+{
+   HReg reg = mkHReg(True /*virtual*/, HRcFlt64, 0, env->vreg_ctr);
+   env->vreg_ctr++;
+   return reg;
+}
+
+/*------------------------------------------------------------*/
+/*--- ISEL: Forward declarations                           ---*/
+/*------------------------------------------------------------*/
+
+/* These are organised as iselXXX and iselXXX_wrk pairs. The iselXXX_wrk do the
+   real work, but are not to be called directly. For each XXX, iselXXX calls its
+   iselXXX_wrk counterpart, then checks that all returned registers are virtual.
+   You should not call the _wrk version directly. */
+
+static HReg iselIntExpr_R(ISelEnv* env, IRExpr* e);
+static void iselInt128Expr(HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e);
+static HReg iselFltExpr(ISelEnv* env, IRExpr* e);
+
+/*------------------------------------------------------------*/
+/*--- ISEL: FP rounding mode helpers                       ---*/
+/*------------------------------------------------------------*/
+
+/* Set the FP rounding mode: 'mode' is an I32-typed expression denoting a value
+   of IRRoundingMode. Set the fcsr RISC-V register to have the same rounding.
+
+   All attempts to set the rounding mode have to be routed through this
+   function for things to work properly. Refer to the comment in the AArch64
+   backend for set_FPCR_rounding_mode() how the mechanism relies on the SSA
+   property of IR and CSE.
+*/
+static void set_fcsr_rounding_mode(ISelEnv* env, IRExpr* mode)
+{
+   vassert(typeOfIRExpr(env->type_env, mode) == Ity_I32);
+
+   /* Do we need to do anything? */
+   if (env->previous_rm && env->previous_rm->tag == Iex_RdTmp &&
+       mode->tag == Iex_RdTmp &&
+       env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
+      /* No - setting it to what it was before.  */
+      vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
+      return;
+   }
+
+   /* No luck - we better set it, and remember what we set it to. */
+   env->previous_rm = mode;
+
+   /*
+      rounding mode                 |  IR  | RISC-V
+      ---------------------------------------------
+      to nearest, ties to even      | 0000 |   000
+      to -infinity                  | 0001 |   011
+      to +infinity                  | 0010 |   010
+      to zero                       | 0011 |   001
+      to nearest, ties away from 0  | 0100 |   100
+      prepare for shorter precision | 0101 |   111
+      to away from 0                | 0110 |   111
+      to nearest, ties towards 0    | 0111 |   111
+      invalid                       | 1000 |   111
+
+      All rounding modes not supported on RISC-V are mapped to 111 which is the
+      dynamic mode that is always invalid in fcsr and raises an illegal
+      instruction exception.
+
+      The mapping can be implemented using the following transformation:
+         t0 = 30 >> rm_IR
+         t1 = t0 & 19
+         t2 = t0 + 7
+         t3 = t1 + t2
+         fcsr_rm_RISCV = t3 >> t1
+   */
+   HReg rm_IR  = iselIntExpr_R(env, mode);
+   HReg imm_30 = newVRegI(env);
+   addInstr(env, RISCV64Instr_LI(imm_30, 30));
+   HReg t0 = newVRegI(env);
+   addInstr(env, RISCV64Instr_ALU(RISCV64op_SRL, t0, imm_30, rm_IR));
+   HReg t1 = newVRegI(env);
+   addInstr(env, RISCV64Instr_ALUImm(RISCV64op_ANDI, t1, t0, 19));
+   HReg t2 = newVRegI(env);
+   addInstr(env, RISCV64Instr_ALUImm(RISCV64op_ADDI, t2, t0, 7));
+   HReg t3 = newVRegI(env);
+   addInstr(env, RISCV64Instr_ALU(RISCV64op_ADD, t3, t1, t2));
+   HReg fcsr_rm_RISCV = newVRegI(env);
+   addInstr(env, RISCV64Instr_ALU(RISCV64op_SRL, fcsr_rm_RISCV, t3, t1));
+   addInstr(env,
+            RISCV64Instr_CSRRW(hregRISCV64_x0(), fcsr_rm_RISCV, 0x002 /*frm*/));
+}
+
+/*------------------------------------------------------------*/
+/*--- ISEL: Function call helpers                          ---*/
+/*------------------------------------------------------------*/
+
+/* Used only in doHelperCall(). See the big comment in doHelperCall() regarding
+   handling of register-parameter arguments. This function figures out whether
+   evaluation of an expression might require use of a fixed register. If in
+   doubt return True (safe but suboptimal).
+*/
+static Bool mightRequireFixedRegs(IRExpr* e)
+{
+   if (UNLIKELY(is_IRExpr_VECRET_or_GSPTR(e))) {
+      /* These are always "safe" -- either a copy of x2/sp in some arbitrary
+         vreg, or a copy of x8/s0, respectively. */
+      return False;
+   }
+   /* Else it's a "normal" expression. */
+   switch (e->tag) {
+   case Iex_RdTmp:
+   case Iex_Const:
+   case Iex_Get:
+      return False;
+   default:
+      return True;
+   }
+}
+
+/* Do a complete function call. |guard| is a Ity_Bit expression indicating
+   whether or not the call happens. If guard==NULL, the call is unconditional.
+   |retloc| is set to indicate where the return value is after the call. The
+   caller (of this fn) must generate code to add |stackAdjustAfterCall| to the
+   stack pointer after the call is done. Returns True iff it managed to handle
+   this combination of arg/return types, else returns False. */
+static Bool doHelperCall(/*OUT*/ UInt*   stackAdjustAfterCall,
+                         /*OUT*/ RetLoc* retloc,
+                         ISelEnv*        env,
+                         IRExpr*         guard,
+                         IRCallee*       cee,
+                         IRType          retTy,
+                         IRExpr**        args)
+{
+   /* Set default returns. We'll update them later if needed. */
+   *stackAdjustAfterCall = 0;
+   *retloc               = mk_RetLoc_INVALID();
+
+   /* Marshal args for a call and do the call.
+
+      This function only deals with a limited set of possibilities, which cover
+      all helpers in practice. The restrictions are that only the following
+      arguments are supported:
+      * RISCV64_N_REGPARMS x Ity_I32/Ity_I64 values, passed in x10/a0 .. x17/a7,
+      * RISCV64_N_FREGPARMS x Ity_F32/Ity_F64 values, passed in f10/fa0 ..
+        f17/fa7.
+
+      Note that the cee->regparms field is meaningless on riscv64 hosts (since
+      we only implement one calling convention) and so we always ignore it.
+
+      The return type can be I{8,16,32,64} or V128. In the V128 case, it is
+      expected that |args| will contain the special node IRExpr_VECRET(), in
+      which case this routine generates code to allocate space on the stack for
+      the vector return value.  Since we are not passing any scalars on the
+      stack, it is enough to preallocate the return space before marshalling any
+      arguments, in this case.
+
+      |args| may also contain IRExpr_GSPTR(), in which case the value in the
+      guest state pointer register minus BASEBLOCK_OFFSET_ADJUSTMENT is passed
+      as the corresponding argument.
+
+      Generating code which is both efficient and correct when parameters are to
+      be passed in registers is difficult, for the reasons elaborated in detail
+      in comments attached to doHelperCall() in VEX/priv/host_x86_isel.c. Here,
+      we use a variant of the method described in those comments.
+
+      The problem is split into two cases: the fast scheme and the slow scheme.
+      In the fast scheme, arguments are computed directly into the target (real)
+      registers. This is only safe when we can be sure that computation of each
+      argument will not trash any real registers set by computation of any other
+      argument.
+
+      In the slow scheme, all args are first computed into vregs, and once they
+      are all done, they are moved to the relevant real regs. This always gives
+      correct code, but it also gives a bunch of vreg-to-rreg moves which are
+      usually redundant but are hard for the register allocator to get rid of.
+
+      To decide which scheme to use, all argument expressions are first
+      examined. If they are all so simple that it is clear they will be
+      evaluated without use of any fixed registers, use the fast scheme, else
+      use the slow scheme. Note also that only unconditional calls may use the
+      fast scheme, since having to compute a condition expression could itself
+      trash real registers.
+
+      Note this requires being able to examine an expression and determine
+      whether or not evaluation of it might use a fixed register. That requires
+      knowledge of how the rest of this insn selector works. Currently just the
+      following 3 are regarded as safe -- hopefully they cover the majority of
+      arguments in practice: IRExpr_RdTmp, IRExpr_Const, IRExpr_Get.
+   */
+
+   /* These are used for cross-checking that IR-level constraints on the use of
+      IRExpr_VECRET() and IRExpr_GSPTR() are observed. */
+   UInt nVECRETs = 0;
+   UInt nGSPTRs  = 0;
+
+   UInt n_args = 0;
+   for (UInt i = 0; args[i] != NULL; i++) {
+      IRExpr* arg = args[i];
+      if (UNLIKELY(arg->tag == Iex_VECRET))
+         nVECRETs++;
+      else if (UNLIKELY(arg->tag == Iex_GSPTR))
+         nGSPTRs++;
+      n_args++;
+   }
+
+   /* If this fails, the IR is ill-formed. */
+   vassert(nGSPTRs == 0 || nGSPTRs == 1);
+
+   /* If we have a VECRET, allocate space on the stack for the return value, and
+      record the stack pointer after that. */
+   HReg r_vecRetAddr = INVALID_HREG;
+   if (nVECRETs == 1) {
+      vassert(retTy == Ity_V128 || retTy == Ity_V256);
+      r_vecRetAddr = newVRegI(env);
+      addInstr(env, RISCV64Instr_ALUImm(RISCV64op_ADDI, hregRISCV64_x2(),
+                                        hregRISCV64_x2(),
+                                        retTy == Ity_V128 ? -16 : -32));
+      addInstr(env, RISCV64Instr_MV(r_vecRetAddr, hregRISCV64_x2()));
+   } else {
+      /* If either of these fail, the IR is ill-formed. */
+      vassert(retTy != Ity_V128 && retTy != Ity_V256);
+      vassert(nVECRETs == 0);
+   }
+
+   /* First decide which scheme (slow or fast) is to be used. First assume the
+      fast scheme, and select slow if any contraindications (wow) appear. */
+   Bool go_fast = True;
+
+   /* We'll need space on the stack for the return value. Avoid possible
+      complications with nested calls by using the slow scheme. */
+   if (retTy == Ity_V128 || retTy == Ity_V256)
+      go_fast = False;
+
+   if (go_fast && guard != NULL) {
+      if (guard->tag == Iex_Const && guard->Iex.Const.con->tag == Ico_U1 &&
+          guard->Iex.Const.con->Ico.U1 == True) {
+         /* Unconditional. */
+      } else {
+         /* Not manifestly unconditional -- be conservative. */
+         go_fast = False;
+      }
+   }
+
+   if (go_fast)
+      for (UInt i = 0; i < n_args; i++) {
+         if (mightRequireFixedRegs(args[i])) {
+            go_fast = False;
+            break;
+         }
+      }
+
+   /* At this point the scheme to use has been established. Generate code to get
+      the arg values into the argument regs. If we run out of arg regs, give up.
+    */
+
+   HReg argregs[RISCV64_N_ARGREGS];
+   HReg fargregs[RISCV64_N_FARGREGS];
+
+   vassert(RISCV64_N_ARGREGS == 8);
+   vassert(RISCV64_N_FARGREGS == 8);
+
+   argregs[0] = hregRISCV64_x10();
+   argregs[1] = hregRISCV64_x11();
+   argregs[2] = hregRISCV64_x12();
+   argregs[3] = hregRISCV64_x13();
+   argregs[4] = hregRISCV64_x14();
+   argregs[5] = hregRISCV64_x15();
+   argregs[6] = hregRISCV64_x16();
+   argregs[7] = hregRISCV64_x17();
+
+   fargregs[0] = hregRISCV64_f10();
+   fargregs[1] = hregRISCV64_f11();
+   fargregs[2] = hregRISCV64_f12();
+   fargregs[3] = hregRISCV64_f13();
+   fargregs[4] = hregRISCV64_f14();
+   fargregs[5] = hregRISCV64_f15();
+   fargregs[6] = hregRISCV64_f16();
+   fargregs[7] = hregRISCV64_f17();
+
+   HReg tmpregs[RISCV64_N_ARGREGS];
+   HReg ftmpregs[RISCV64_N_FARGREGS];
+   Int  nextArgReg = 0, nextFArgReg = 0;
+   HReg cond;
+
+   if (go_fast) {
+      /* FAST SCHEME */
+      for (UInt i = 0; i < n_args; i++) {
+         IRExpr* arg = args[i];
+
+         IRType aTy = Ity_INVALID;
+         if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
+            aTy = typeOfIRExpr(env->type_env, args[i]);
+
+         if (aTy == Ity_I32 || aTy == Ity_I64) {
+            if (nextArgReg >= RISCV64_N_ARGREGS)
+               return False; /* Out of argregs. */
+            addInstr(env, RISCV64Instr_MV(argregs[nextArgReg],
+                                          iselIntExpr_R(env, args[i])));
+            nextArgReg++;
+         } else if (aTy == Ity_F32 || aTy == Ity_F64) {
+            if (nextFArgReg >= RISCV64_N_FARGREGS)
+               return False; /* Out of fargregs. */
+            addInstr(env,
+                     RISCV64Instr_FpMove(RISCV64op_FMV_D, fargregs[nextFArgReg],
+                                         iselFltExpr(env, args[i])));
+            nextFArgReg++;
+         } else if (arg->tag == Iex_GSPTR) {
+            if (nextArgReg >= RISCV64_N_ARGREGS)
+               return False; /* Out of argregs. */
+            addInstr(env,
+                     RISCV64Instr_MV(argregs[nextArgReg], hregRISCV64_x8()));
+            nextArgReg++;
+         } else if (arg->tag == Iex_VECRET) {
+            /* Because of the go_fast logic above, we can't get here, since
+               vector return values make us use the slow path instead. */
+            vassert(0);
+         } else
+            return False; /* Unhandled arg type. */
+      }
+
+      /* Fast scheme only applies for unconditional calls. Hence: */
+      cond = INVALID_HREG;
+
+   } else {
+      /* SLOW SCHEME; move via temporaries. */
+      for (UInt i = 0; i < n_args; i++) {
+         IRExpr* arg = args[i];
+
+         IRType aTy = Ity_INVALID;
+         if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
+            aTy = typeOfIRExpr(env->type_env, args[i]);
+
+         if (aTy == Ity_I32 || aTy == Ity_I64) {
+            if (nextArgReg >= RISCV64_N_ARGREGS)
+               return False; /* Out of argregs. */
+            tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
+            nextArgReg++;
+         } else if (aTy == Ity_F32 || aTy == Ity_F64) {
+            if (nextFArgReg >= RISCV64_N_FARGREGS)
+               return False; /* Out of fargregs. */
+            ftmpregs[nextFArgReg] = iselFltExpr(env, args[i]);
+            nextFArgReg++;
+         } else if (arg->tag == Iex_GSPTR) {
+            if (nextArgReg >= RISCV64_N_ARGREGS)
+               return False; /* Out of argregs. */
+            tmpregs[nextArgReg] = hregRISCV64_x8();
+            nextArgReg++;
+         } else if (arg->tag == Iex_VECRET) {
+            vassert(!hregIsInvalid(r_vecRetAddr));
+            tmpregs[nextArgReg] = r_vecRetAddr;
+            nextArgReg++;
+         } else
+            return False; /* Unhandled arg type. */
+      }
+
+      /* Compute the condition. Be a bit clever to handle the common case where
+         the guard is 1:Bit. */
+      cond = INVALID_HREG;
+      if (guard) {
+         if (guard->tag == Iex_Const && guard->Iex.Const.con->tag == Ico_U1 &&
+             guard->Iex.Const.con->Ico.U1 == True) {
+            /* Unconditional -- do nothing. */
+         } else {
+            cond = iselIntExpr_R(env, guard);
+         }
+      }
+
+      /* Move the args to their final destinations. */
+      for (UInt i = 0; i < nextArgReg; i++) {
+         vassert(!(hregIsInvalid(tmpregs[i])));
+         addInstr(env, RISCV64Instr_MV(argregs[i], tmpregs[i]));
+      }
+      for (UInt i = 0; i < nextFArgReg; i++) {
+         vassert(!(hregIsInvalid(ftmpregs[i])));
+         addInstr(env, RISCV64Instr_FpMove(RISCV64op_FMV_D, fargregs[i],
+                                           ftmpregs[i]));
+      }
+   }
+
+   /* Should be assured by checks above. */
+   vassert(nextArgReg <= RISCV64_N_ARGREGS);
+   vassert(nextFArgReg <= RISCV64_N_FARGREGS);
+
+   /* Do final checks, set the return values, and generate the call instruction
+      proper. */
+   vassert(nGSPTRs == 0 || nGSPTRs == 1);
+   vassert(nVECRETs == ((retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0));
+   vassert(*stackAdjustAfterCall == 0);
+   vassert(is_RetLoc_INVALID(*retloc));
+   switch (retTy) {
+   case Ity_INVALID:
+      /* Function doesn't return a value. */
+      *retloc = mk_RetLoc_simple(RLPri_None);
+      break;
+   case Ity_I8:
+   case Ity_I16:
+   case Ity_I32:
+   case Ity_I64:
+      *retloc = mk_RetLoc_simple(RLPri_Int);
+      break;
+   case Ity_V128:
+      *retloc               = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
+      *stackAdjustAfterCall = 16;
+      break;
+   case Ity_V256:
+      *retloc               = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
+      *stackAdjustAfterCall = 32;
+      break;
+   default:
+      /* IR can denote other possible return types, but we don't handle those
+         here. */
+      return False;
+   }
+
+   /* Finally, generate the call itself. This needs the *retloc value set in the
+      switch above, which is why it's at the end. */
+
+   /* nextArgReg doles out argument registers. Since these are assigned in the
+      order x10/a0 .. x17/a7, its numeric value at this point, which must be
+      between 0 and 8 inclusive, is going to be equal to the number of arg regs
+      in use for the call. Hence bake that number into the call (we'll need to
+      know it when doing register allocation, to know what regs the call reads.)
+
+      The same applies to nextFArgReg which records a number of used
+      floating-point registers f10/fa0 .. f17/fa7.
+    */
+   addInstr(env, RISCV64Instr_Call(*retloc, (Addr64)cee->addr, cond, nextArgReg,
+                                   nextFArgReg));
+
+   return True;
+}
+
+/*------------------------------------------------------------*/
+/*--- ISEL: Integer expressions (64/32/16/8/1 bit)         ---*/
+/*------------------------------------------------------------*/
+
+/* Select insns for an integer-typed expression, and add them to the code list.
+   Return a reg holding the result. This reg will be a virtual register. THE
+   RETURNED REG MUST NOT BE MODIFIED. If you want to modify it, ask for a new
+   vreg, copy it in there, and modify the copy. The register allocator will do
+   its best to map both vregs to the same real register, so the copies will
+   often disappear later in the game.
+
+   This should handle expressions of 64, 32, 16, 8 and 1-bit type. All results
+   are returned in a 64-bit register. For an N-bit expression, the upper 64-N
+   bits are arbitrary, so you should mask or sign-extend partial values if
+   necessary.
+
+   The riscv64 backend however internally always extends the values as follows:
+   * a 32/16/8-bit integer result is sign-extended to 64 bits,
+   * a 1-bit logical result is zero-extended to 64 bits.
+
+   This schema follows the approach taken by the RV64 ISA which by default
+   sign-extends any 32/16/8-bit operation result to 64 bits. Matching the isel
+   with the ISA generally results in requiring less instructions. For instance,
+   it allows that any Ico_U32 immediate can be always materialized at maximum
+   using two instructions (LUI+ADDIW).
+
+   An important consequence of this design is that any Iop_<N>Sto64 extension is
+   a no-op. On the other hand, any Iop_64to<N> operation must additionally
+   perform an N-bit sign-extension. This is the opposite situation than in most
+   other VEX backends.
+*/
+
+/* -------------------------- Reg --------------------------- */
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static HReg iselIntExpr_R_wrk(ISelEnv* env, IRExpr* e)
+{
+   IRType ty = typeOfIRExpr(env->type_env, e);
+   vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8 ||
+           ty == Ity_I1);
+
+   switch (e->tag) {
+   /* ------------------------ TEMP ------------------------- */
+   case Iex_RdTmp: {
+      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+   }
+
+   /* ------------------------ LOAD ------------------------- */
+   case Iex_Load: {
+      if (e->Iex.Load.end != Iend_LE)
+         goto irreducible;
+
+      HReg dst = newVRegI(env);
+      /* TODO Optimize the cases with small imm Add64/Sub64. */
+      HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
+
+      if (ty == Ity_I64)
+         addInstr(env, RISCV64Instr_Load(RISCV64op_LD, dst, addr, 0));
+      else if (ty == Ity_I32)
+         addInstr(env, RISCV64Instr_Load(RISCV64op_LW, dst, addr, 0));
+      else if (ty == Ity_I16)
+         addInstr(env, RISCV64Instr_Load(RISCV64op_LH, dst, addr, 0));
+      else if (ty == Ity_I8)
+         addInstr(env, RISCV64Instr_Load(RISCV64op_LB, dst, addr, 0));
+      else
+         goto irreducible;
+      return dst;
+   }
+
+   /* ---------------------- BINARY OP ---------------------- */
+   case Iex_Binop: {
+      /* TODO Optimize for small imms by generating <instr>i. */
+      switch (e->Iex.Binop.op) {
+      case Iop_Add64:
+      case Iop_Add32:
+      case Iop_Sub64:
+      case Iop_Sub32:
+      case Iop_Xor64:
+      case Iop_Xor32:
+      case Iop_Or64:
+      case Iop_Or32:
+      case Iop_Or1:
+      case Iop_And64:
+      case Iop_And32:
+      case Iop_And1:
+      case Iop_Shl64:
+      case Iop_Shl32:
+      case Iop_Shr64:
+      case Iop_Shr32:
+      case Iop_Sar64:
+      case Iop_Sar32:
+      case Iop_Mul64:
+      case Iop_Mul32:
+      case Iop_DivU64:
+      case Iop_DivU32:
+      case Iop_DivS64:
+      case Iop_DivS32: {
+         RISCV64ALUOp op;
+         switch (e->Iex.Binop.op) {
+         case Iop_Add64:
+            op = RISCV64op_ADD;
+            break;
+         case Iop_Add32:
+            op = RISCV64op_ADDW;
+            break;
+         case Iop_Sub64:
+            op = RISCV64op_SUB;
+            break;
+         case Iop_Sub32:
+            op = RISCV64op_SUBW;
+            break;
+         case Iop_Xor64:
+         case Iop_Xor32:
+            op = RISCV64op_XOR;
+            break;
+         case Iop_Or64:
+         case Iop_Or32:
+         case Iop_Or1:
+            op = RISCV64op_OR;
+            break;
+         case Iop_And64:
+         case Iop_And32:
+         case Iop_And1:
+            op = RISCV64op_AND;
+            break;
+         case Iop_Shl64:
+            op = RISCV64op_SLL;
+            break;
+         case Iop_Shl32:
+            op = RISCV64op_SLLW;
+            break;
+         case Iop_Shr64:
+            op = RISCV64op_SRL;
+            break;
+         case Iop_Shr32:
+            op = RISCV64op_SRLW;
+            break;
+         case Iop_Sar64:
+            op = RISCV64op_SRA;
+            break;
+         case Iop_Sar32:
+            op = RISCV64op_SRAW;
+            break;
+         case Iop_Mul64:
+            op = RISCV64op_MUL;
+            break;
+         case Iop_Mul32:
+            op = RISCV64op_MULW;
+            break;
+         case Iop_DivU64:
+            op = RISCV64op_DIVU;
+            break;
+         case Iop_DivU32:
+            op = RISCV64op_DIVUW;
+            break;
+         case Iop_DivS64:
+            op = RISCV64op_DIV;
+            break;
+         case Iop_DivS32:
+            op = RISCV64op_DIVW;
+            break;
+         default:
+            vassert(0);
+         }
+         HReg dst  = newVRegI(env);
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         addInstr(env, RISCV64Instr_ALU(op, dst, argL, argR));
+         return dst;
+      }
+      case Iop_CmpEQ64:
+      case Iop_CmpEQ32:
+      case Iop_CasCmpEQ64:
+      case Iop_CasCmpEQ32: {
+         HReg tmp  = newVRegI(env);
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_SUB, tmp, argL, argR));
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLTIU, dst, tmp, 1));
+         return dst;
+      }
+      case Iop_CmpNE64:
+      case Iop_CmpNE32:
+      case Iop_CasCmpNE64:
+      case Iop_CasCmpNE32: {
+         HReg tmp  = newVRegI(env);
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_SUB, tmp, argL, argR));
+         HReg dst = newVRegI(env);
+         addInstr(env,
+                  RISCV64Instr_ALU(RISCV64op_SLTU, dst, hregRISCV64_x0(), tmp));
+         return dst;
+      }
+      case Iop_CmpLT64S:
+      case Iop_CmpLT32S: {
+         HReg dst  = newVRegI(env);
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_SLT, dst, argL, argR));
+         return dst;
+      }
+      case Iop_CmpLE64S:
+      case Iop_CmpLE32S: {
+         HReg tmp  = newVRegI(env);
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_SLT, tmp, argR, argL));
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLTIU, dst, tmp, 1));
+         return dst;
+      }
+      case Iop_CmpLT64U:
+      case Iop_CmpLT32U: {
+         HReg dst  = newVRegI(env);
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_SLTU, dst, argL, argR));
+         return dst;
+      }
+      case Iop_CmpLE64U:
+      case Iop_CmpLE32U: {
+         HReg tmp  = newVRegI(env);
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_SLTU, tmp, argR, argL));
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLTIU, dst, tmp, 1));
+         return dst;
+      }
+      case Iop_Max32U: {
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         HReg cond = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_SLTU, cond, argL, argR));
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_CSEL(dst, argR, argL, cond));
+         return dst;
+      }
+      case Iop_32HLto64: {
+         HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
+
+         HReg lo32_tmp = newVRegI(env);
+         addInstr(env,
+                  RISCV64Instr_ALUImm(RISCV64op_SLLI, lo32_tmp, lo32s, 32));
+         HReg lo32 = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SRLI, lo32, lo32_tmp, 32));
+
+         HReg hi32 = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLLI, hi32, hi32s, 32));
+
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_OR, dst, hi32, lo32));
+         return dst;
+      }
+      case Iop_DivModS32to32: {
+         /* TODO Improve in conjunction with Iop_64HIto32. */
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+
+         HReg remw = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_REMW, remw, argL, argR));
+         HReg remw_hi = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLLI, remw_hi, remw, 32));
+
+         HReg divw = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_DIVW, divw, argL, argR));
+         HReg divw_hi = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLLI, divw_hi, divw, 32));
+         HReg divw_lo = newVRegI(env);
+         addInstr(env,
+                  RISCV64Instr_ALUImm(RISCV64op_SRLI, divw_lo, divw_hi, 32));
+
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_OR, dst, remw_hi, divw_lo));
+         return dst;
+      }
+      case Iop_DivModU32to32: {
+         /* TODO Improve in conjunction with Iop_64HIto32. */
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+
+         HReg remuw = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_REMUW, remuw, argL, argR));
+         HReg remuw_hi = newVRegI(env);
+         addInstr(env,
+                  RISCV64Instr_ALUImm(RISCV64op_SLLI, remuw_hi, remuw, 32));
+
+         HReg divuw = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_DIVUW, divuw, argL, argR));
+         HReg divuw_hi = newVRegI(env);
+         addInstr(env,
+                  RISCV64Instr_ALUImm(RISCV64op_SLLI, divuw_hi, divuw, 32));
+         HReg divuw_lo = newVRegI(env);
+         addInstr(env,
+                  RISCV64Instr_ALUImm(RISCV64op_SRLI, divuw_lo, divuw_hi, 32));
+
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_OR, dst, remuw_hi, divuw_lo));
+         return dst;
+      }
+      case Iop_F32toI32S:
+      case Iop_F32toI32U:
+      case Iop_F32toI64S:
+      case Iop_F32toI64U: {
+         RISCV64FpConvertOp op;
+         switch (e->Iex.Binop.op) {
+         case Iop_F32toI32S:
+            op = RISCV64op_FCVT_W_S;
+            break;
+         case Iop_F32toI32U:
+            op = RISCV64op_FCVT_WU_S;
+            break;
+         case Iop_F32toI64S:
+            op = RISCV64op_FCVT_L_S;
+            break;
+         case Iop_F32toI64U:
+            op = RISCV64op_FCVT_LU_S;
+            break;
+         default:
+            vassert(0);
+         }
+         HReg dst = newVRegI(env);
+         HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
+         set_fcsr_rounding_mode(env, e->Iex.Binop.arg1);
+         addInstr(env, RISCV64Instr_FpConvert(op, dst, src));
+         return dst;
+      }
+      case Iop_CmpF32:
+      case Iop_CmpF64: {
+         HReg argL = iselFltExpr(env, e->Iex.Binop.arg1);
+         HReg argR = iselFltExpr(env, e->Iex.Binop.arg2);
+
+         HReg lt = newVRegI(env);
+         HReg gt = newVRegI(env);
+         HReg eq = newVRegI(env);
+         if (e->Iex.Binop.op == Iop_CmpF32) {
+            addInstr(env,
+                     RISCV64Instr_FpCompare(RISCV64op_FLT_S, lt, argL, argR));
+            addInstr(env,
+                     RISCV64Instr_FpCompare(RISCV64op_FLT_S, gt, argR, argL));
+            addInstr(env,
+                     RISCV64Instr_FpCompare(RISCV64op_FEQ_S, eq, argL, argR));
+         } else {
+            addInstr(env,
+                     RISCV64Instr_FpCompare(RISCV64op_FLT_D, lt, argL, argR));
+            addInstr(env,
+                     RISCV64Instr_FpCompare(RISCV64op_FLT_D, gt, argR, argL));
+            addInstr(env,
+                     RISCV64Instr_FpCompare(RISCV64op_FEQ_D, eq, argL, argR));
+         }
+
+         /*
+            t0 = Ircr_UN
+            t1 = Ircr_LT
+            t2 = csel t1, t0, lt
+            t3 = Ircr_GT
+            t4 = csel t3, t2, gt
+            t5 = Ircr_EQ
+            dst = csel t5, t4, eq
+         */
+         HReg t0 = newVRegI(env);
+         addInstr(env, RISCV64Instr_LI(t0, Ircr_UN));
+         HReg t1 = newVRegI(env);
+         addInstr(env, RISCV64Instr_LI(t1, Ircr_LT));
+         HReg t2 = newVRegI(env);
+         addInstr(env, RISCV64Instr_CSEL(t2, t1, t0, lt));
+         HReg t3 = newVRegI(env);
+         addInstr(env, RISCV64Instr_LI(t3, Ircr_GT));
+         HReg t4 = newVRegI(env);
+         addInstr(env, RISCV64Instr_CSEL(t4, t3, t2, gt));
+         HReg t5 = newVRegI(env);
+         addInstr(env, RISCV64Instr_LI(t5, Ircr_EQ));
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_CSEL(dst, t5, t4, eq));
+         return dst;
+      }
+      case Iop_F64toI32S:
+      case Iop_F64toI32U:
+      case Iop_F64toI64S:
+      case Iop_F64toI64U: {
+         RISCV64FpConvertOp op;
+         switch (e->Iex.Binop.op) {
+         case Iop_F64toI32S:
+            op = RISCV64op_FCVT_W_D;
+            break;
+         case Iop_F64toI32U:
+            op = RISCV64op_FCVT_WU_D;
+            break;
+         case Iop_F64toI64S:
+            op = RISCV64op_FCVT_L_D;
+            break;
+         case Iop_F64toI64U:
+            op = RISCV64op_FCVT_LU_D;
+            break;
+         default:
+            vassert(0);
+         }
+         HReg dst = newVRegI(env);
+         HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
+         set_fcsr_rounding_mode(env, e->Iex.Binop.arg1);
+         addInstr(env, RISCV64Instr_FpConvert(op, dst, src));
+         return dst;
+      }
+      default:
+         break;
+      }
+
+      break;
+   }
+
+   /* ---------------------- UNARY OP ----------------------- */
+   case Iex_Unop: {
+      switch (e->Iex.Unop.op) {
+      case Iop_Not64:
+      case Iop_Not32: {
+         HReg dst = newVRegI(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_XORI, dst, src, -1));
+         return dst;
+      }
+      case Iop_Not1: {
+         HReg dst = newVRegI(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLTIU, dst, src, 1));
+         return dst;
+      }
+      case Iop_8Uto32:
+      case Iop_8Uto64:
+      case Iop_16Uto64:
+      case Iop_32Uto64: {
+         UInt shift =
+            64 - 8 * sizeofIRType(typeOfIRExpr(env->type_env, e->Iex.Unop.arg));
+         HReg tmp = newVRegI(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLLI, tmp, src, shift));
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SRLI, dst, tmp, shift));
+         return dst;
+      }
+      case Iop_1Sto32:
+      case Iop_1Sto64: {
+         HReg tmp = newVRegI(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLLI, tmp, src, 63));
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SRAI, dst, tmp, 63));
+         return dst;
+      }
+      case Iop_1Uto64:
+      case Iop_8Sto64:
+      case Iop_16Sto64:
+      case Iop_32Sto64:
+         /* These are no-ops. */
+         return iselIntExpr_R(env, e->Iex.Unop.arg);
+      case Iop_32to8:
+      case Iop_32to16:
+      case Iop_64to8:
+      case Iop_64to16:
+      case Iop_64to32: {
+         UInt shift = 64 - 8 * sizeofIRType(ty);
+         HReg tmp   = newVRegI(env);
+         HReg src   = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLLI, tmp, src, shift));
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SRAI, dst, tmp, shift));
+         return dst;
+      }
+      case Iop_128HIto64: {
+         HReg rHi, rLo;
+         iselInt128Expr(&rHi, &rLo, env, e->Iex.Unop.arg);
+         return rHi; /* and abandon rLo */
+      }
+      case Iop_64HIto32: {
+         HReg dst = newVRegI(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SRAI, dst, src, 32));
+         return dst;
+      }
+      case Iop_ReinterpF32asI32: {
+         HReg dst = newVRegI(env);
+         HReg src = iselFltExpr(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_FpMove(RISCV64op_FMV_X_W, dst, src));
+         return dst;
+      }
+      case Iop_ReinterpF64asI64: {
+         HReg dst = newVRegI(env);
+         HReg src = iselFltExpr(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_FpMove(RISCV64op_FMV_X_D, dst, src));
+         return dst;
+      }
+      case Iop_CmpNEZ8:
+      case Iop_CmpNEZ32:
+      case Iop_CmpNEZ64: {
+         HReg dst = newVRegI(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env,
+                  RISCV64Instr_ALU(RISCV64op_SLTU, dst, hregRISCV64_x0(), src));
+         return dst;
+      }
+      case Iop_CmpwNEZ32:
+      case Iop_CmpwNEZ64: {
+         /* Use the fact that x | -x == 0 iff x == 0. Otherwise, either X or -X
+            will have a 1 in the MSB. */
+         HReg neg = newVRegI(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env,
+                  RISCV64Instr_ALU(RISCV64op_SUB, neg, hregRISCV64_x0(), src));
+         HReg or = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_OR, or, src, neg));
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SRAI, dst, or, 63));
+         return dst;
+      }
+      case Iop_Left32:
+      case Iop_Left64: {
+         /* Left32/64(src) = src | -src. */
+         HReg neg = newVRegI(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env,
+                  RISCV64Instr_ALU(RISCV64op_SUB, neg, hregRISCV64_x0(), src));
+         HReg dst = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_OR, dst, src, neg));
+         return dst;
+      }
+      default:
+         break;
+      }
+
+      break;
+   }
+
+   /* ------------------------- GET ------------------------- */
+   case Iex_Get: {
+      HReg dst  = newVRegI(env);
+      HReg base = get_baseblock_register();
+      Int  off  = e->Iex.Get.offset - BASEBLOCK_OFFSET_ADJUSTMENT;
+      vassert(off >= -2048 && off < 2048);
+
+      if (ty == Ity_I64)
+         addInstr(env, RISCV64Instr_Load(RISCV64op_LD, dst, base, off));
+      else if (ty == Ity_I32)
+         addInstr(env, RISCV64Instr_Load(RISCV64op_LW, dst, base, off));
+      else if (ty == Ity_I16)
+         addInstr(env, RISCV64Instr_Load(RISCV64op_LH, dst, base, off));
+      else if (ty == Ity_I8)
+         addInstr(env, RISCV64Instr_Load(RISCV64op_LB, dst, base, off));
+      else
+         goto irreducible;
+      return dst;
+   }
+
+   /* ------------------------ CCALL ------------------------ */
+   case Iex_CCall: {
+      vassert(ty == e->Iex.CCall.retty);
+
+      /* Be very restrictive for now. Only 32 and 64-bit ints are allowed for
+         the return type. */
+      if (e->Iex.CCall.retty != Ity_I32 && e->Iex.CCall.retty != Ity_I64)
+         goto irreducible;
+
+      /* Marshal args and do the call. */
+      UInt   addToSp = 0;
+      RetLoc rloc    = mk_RetLoc_INVALID();
+      Bool   ok =
+         doHelperCall(&addToSp, &rloc, env, NULL /*guard*/, e->Iex.CCall.cee,
+                      e->Iex.CCall.retty, e->Iex.CCall.args);
+      if (!ok)
+         goto irreducible;
+      vassert(is_sane_RetLoc(rloc));
+      vassert(rloc.pri == RLPri_Int);
+      vassert(addToSp == 0);
+
+      HReg dst = newVRegI(env);
+      switch (e->Iex.CCall.retty) {
+      case Ity_I32:
+         /* Sign-extend the value returned from the helper as is expected by the
+            rest of the backend. */
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_ADDIW, dst,
+                                           hregRISCV64_x10(), 0));
+         break;
+      case Ity_I64:
+         addInstr(env, RISCV64Instr_MV(dst, hregRISCV64_x10()));
+         break;
+      default:
+         vassert(0);
+      }
+      return dst;
+   }
+
+   /* ----------------------- LITERAL ----------------------- */
+   /* 64/32/16/8-bit literals. */
+   case Iex_Const: {
+      ULong u;
+      HReg  dst = newVRegI(env);
+      switch (e->Iex.Const.con->tag) {
+      case Ico_U64:
+         u = e->Iex.Const.con->Ico.U64;
+         break;
+      case Ico_U32:
+         vassert(ty == Ity_I32);
+         u = vex_sx_to_64(e->Iex.Const.con->Ico.U32, 32);
+         break;
+      case Ico_U16:
+         vassert(ty == Ity_I16);
+         u = vex_sx_to_64(e->Iex.Const.con->Ico.U16, 16);
+         break;
+      case Ico_U8:
+         vassert(ty == Ity_I8);
+         u = vex_sx_to_64(e->Iex.Const.con->Ico.U8, 8);
+         break;
+      default:
+         goto irreducible;
+      }
+      addInstr(env, RISCV64Instr_LI(dst, u));
+      return dst;
+   }
+
+   /* ---------------------- MULTIPLEX ---------------------- */
+   case Iex_ITE: {
+      /* ITE(ccexpr, iftrue, iffalse) */
+      if (ty == Ity_I64 || ty == Ity_I32) {
+         HReg dst     = newVRegI(env);
+         HReg iftrue  = iselIntExpr_R(env, e->Iex.ITE.iftrue);
+         HReg iffalse = iselIntExpr_R(env, e->Iex.ITE.iffalse);
+         HReg cond    = iselIntExpr_R(env, e->Iex.ITE.cond);
+         addInstr(env, RISCV64Instr_CSEL(dst, iftrue, iffalse, cond));
+         return dst;
+      }
+      break;
+   }
+
+   default:
+      break;
+   }
+
+   /* We get here if no pattern matched. */
+irreducible:
+   ppIRExpr(e);
+   vpanic("iselIntExpr_R(riscv64)");
+}
+
+static HReg iselIntExpr_R(ISelEnv* env, IRExpr* e)
+{
+   HReg r = iselIntExpr_R_wrk(env, e);
+
+   /* Sanity checks ... */
+   vassert(hregClass(r) == HRcInt64);
+   vassert(hregIsVirtual(r));
+
+   return r;
+}
+
+/*------------------------------------------------------------*/
+/*--- ISEL: Integer expressions (128 bit)                  ---*/
+/*------------------------------------------------------------*/
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static void iselInt128Expr_wrk(HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e)
+{
+   vassert(typeOfIRExpr(env->type_env, e) == Ity_I128);
+
+   /* ---------------------- BINARY OP ---------------------- */
+   if (e->tag == Iex_Binop) {
+      switch (e->Iex.Binop.op) {
+      /* 64 x 64 -> 128 multiply */
+      case Iop_MullS64:
+      case Iop_MullU64: {
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         *rHi      = newVRegI(env);
+         *rLo      = newVRegI(env);
+         if (e->Iex.Binop.op == Iop_MullS64)
+            addInstr(env, RISCV64Instr_ALU(RISCV64op_MULH, *rHi, argL, argR));
+         else
+            addInstr(env, RISCV64Instr_ALU(RISCV64op_MULHU, *rHi, argL, argR));
+         addInstr(env, RISCV64Instr_ALU(RISCV64op_MUL, *rLo, argL, argR));
+         return;
+      }
+
+      /* 64 x 64 -> (64(rem),64(div)) division */
+      case Iop_DivModS64to64:
+      case Iop_DivModU64to64: {
+         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         *rHi      = newVRegI(env);
+         *rLo      = newVRegI(env);
+         if (e->Iex.Binop.op == Iop_DivModS64to64) {
+            addInstr(env, RISCV64Instr_ALU(RISCV64op_REM, *rHi, argL, argR));
+            addInstr(env, RISCV64Instr_ALU(RISCV64op_DIV, *rLo, argL, argR));
+         } else {
+            addInstr(env, RISCV64Instr_ALU(RISCV64op_REMU, *rHi, argL, argR));
+            addInstr(env, RISCV64Instr_ALU(RISCV64op_DIVU, *rLo, argL, argR));
+         }
+         return;
+      }
+
+      /* 64HLto128(e1,e2) */
+      case Iop_64HLto128:
+         *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         return;
+
+      default:
+         break;
+      }
+   }
+
+   ppIRExpr(e);
+   vpanic("iselInt128Expr(riscv64)");
+}
+
+/* Compute a 128-bit value into a register pair, which is returned as the first
+   two parameters. As with iselIntExpr_R, these will be virtual registers and
+   they must not be changed by subsequent code emitted by the caller. */
+static void iselInt128Expr(HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e)
+{
+   iselInt128Expr_wrk(rHi, rLo, env, e);
+
+   /* Sanity checks ... */
+   vassert(hregClass(*rHi) == HRcInt64);
+   vassert(hregIsVirtual(*rHi));
+   vassert(hregClass(*rLo) == HRcInt64);
+   vassert(hregIsVirtual(*rLo));
+}
+
+/*------------------------------------------------------------*/
+/*--- ISEL: Floating point expressions                     ---*/
+/*------------------------------------------------------------*/
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static HReg iselFltExpr_wrk(ISelEnv* env, IRExpr* e)
+{
+   IRType ty = typeOfIRExpr(env->type_env, e);
+   vassert(ty == Ity_F32 || ty == Ity_F64);
+
+   switch (e->tag) {
+   /* ------------------------ TEMP ------------------------- */
+   case Iex_RdTmp: {
+      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
+   }
+
+   /* ------------------------ LOAD ------------------------- */
+   case Iex_Load: {
+      if (e->Iex.Load.end != Iend_LE)
+         goto irreducible;
+
+      HReg dst = newVRegF(env);
+      /* TODO Optimize the cases with small imm Add64/Sub64. */
+      HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
+
+      if (ty == Ity_F32)
+         addInstr(env, RISCV64Instr_FpLdSt(RISCV64op_FLW, dst, addr, 0));
+      else if (ty == Ity_F64)
+         addInstr(env, RISCV64Instr_FpLdSt(RISCV64op_FLD, dst, addr, 0));
+      else
+         vassert(0);
+      return dst;
+   }
+
+   /* -------------------- QUATERNARY OP -------------------- */
+   case Iex_Qop: {
+      switch (e->Iex.Qop.details->op) {
+      case Iop_MAddF32: {
+         HReg dst  = newVRegF(env);
+         HReg argN = iselFltExpr(env, e->Iex.Qop.details->arg2);
+         HReg argM = iselFltExpr(env, e->Iex.Qop.details->arg3);
+         HReg argA = iselFltExpr(env, e->Iex.Qop.details->arg4);
+         set_fcsr_rounding_mode(env, e->Iex.Qop.details->arg1);
+         addInstr(env, RISCV64Instr_FpTernary(RISCV64op_FMADD_S, dst, argN,
+                                              argM, argA));
+         return dst;
+      }
+      case Iop_MAddF64: {
+         HReg dst  = newVRegF(env);
+         HReg argN = iselFltExpr(env, e->Iex.Qop.details->arg2);
+         HReg argM = iselFltExpr(env, e->Iex.Qop.details->arg3);
+         HReg argA = iselFltExpr(env, e->Iex.Qop.details->arg4);
+         set_fcsr_rounding_mode(env, e->Iex.Qop.details->arg1);
+         addInstr(env, RISCV64Instr_FpTernary(RISCV64op_FMADD_D, dst, argN,
+                                              argM, argA));
+         return dst;
+      }
+      default:
+         break;
+      }
+
+      break;
+   }
+
+   /* --------------------- TERNARY OP ---------------------- */
+   case Iex_Triop: {
+      RISCV64FpBinaryOp op;
+      switch (e->Iex.Triop.details->op) {
+      case Iop_AddF32:
+         op = RISCV64op_FADD_S;
+         break;
+      case Iop_MulF32:
+         op = RISCV64op_FMUL_S;
+         break;
+      case Iop_DivF32:
+         op = RISCV64op_FDIV_S;
+         break;
+      case Iop_AddF64:
+         op = RISCV64op_FADD_D;
+         break;
+      case Iop_SubF64:
+         op = RISCV64op_FSUB_D;
+         break;
+      case Iop_MulF64:
+         op = RISCV64op_FMUL_D;
+         break;
+      case Iop_DivF64:
+         op = RISCV64op_FDIV_D;
+         break;
+      default:
+         goto irreducible;
+      }
+      HReg dst  = newVRegF(env);
+      HReg src1 = iselFltExpr(env, e->Iex.Triop.details->arg2);
+      HReg src2 = iselFltExpr(env, e->Iex.Triop.details->arg3);
+      set_fcsr_rounding_mode(env, e->Iex.Triop.details->arg1);
+      addInstr(env, RISCV64Instr_FpBinary(op, dst, src1, src2));
+      return dst;
+   }
+
+   /* ---------------------- BINARY OP ---------------------- */
+   case Iex_Binop: {
+      switch (e->Iex.Binop.op) {
+      case Iop_SqrtF32: {
+         HReg dst = newVRegF(env);
+         HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
+         set_fcsr_rounding_mode(env, e->Iex.Binop.arg1);
+         addInstr(env, RISCV64Instr_FpUnary(RISCV64op_FSQRT_S, dst, src));
+         return dst;
+      }
+      case Iop_SqrtF64: {
+         HReg dst = newVRegF(env);
+         HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
+         set_fcsr_rounding_mode(env, e->Iex.Binop.arg1);
+         addInstr(env, RISCV64Instr_FpUnary(RISCV64op_FSQRT_D, dst, src));
+         return dst;
+      }
+      case Iop_I32StoF32:
+      case Iop_I32UtoF32:
+      case Iop_I64StoF32:
+      case Iop_I64UtoF32:
+      case Iop_I64StoF64:
+      case Iop_I64UtoF64: {
+         RISCV64FpConvertOp op;
+         switch (e->Iex.Binop.op) {
+         case Iop_I32StoF32:
+            op = RISCV64op_FCVT_S_W;
+            break;
+         case Iop_I32UtoF32:
+            op = RISCV64op_FCVT_S_WU;
+            break;
+         case Iop_I64StoF32:
+            op = RISCV64op_FCVT_S_L;
+            break;
+         case Iop_I64UtoF32:
+            op = RISCV64op_FCVT_S_LU;
+            break;
+         case Iop_I64StoF64:
+            op = RISCV64op_FCVT_D_L;
+            break;
+         case Iop_I64UtoF64:
+            op = RISCV64op_FCVT_D_LU;
+            break;
+         default:
+            vassert(0);
+         }
+         HReg dst = newVRegF(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         set_fcsr_rounding_mode(env, e->Iex.Binop.arg1);
+         addInstr(env, RISCV64Instr_FpConvert(op, dst, src));
+         return dst;
+      }
+      case Iop_F64toF32: {
+         HReg dst = newVRegF(env);
+         HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
+         set_fcsr_rounding_mode(env, e->Iex.Binop.arg1);
+         addInstr(env, RISCV64Instr_FpConvert(RISCV64op_FCVT_S_D, dst, src));
+         return dst;
+      }
+      case Iop_MinNumF32:
+      case Iop_MaxNumF32:
+      case Iop_MinNumF64:
+      case Iop_MaxNumF64: {
+         RISCV64FpBinaryOp op;
+         switch (e->Iex.Binop.op) {
+         case Iop_MinNumF32:
+            op = RISCV64op_FMIN_S;
+            break;
+         case Iop_MaxNumF32:
+            op = RISCV64op_FMAX_S;
+            break;
+         case Iop_MinNumF64:
+            op = RISCV64op_FMIN_D;
+            break;
+         case Iop_MaxNumF64:
+            op = RISCV64op_FMAX_D;
+            break;
+         default:
+            vassert(0);
+         }
+         HReg dst  = newVRegF(env);
+         HReg src1 = iselFltExpr(env, e->Iex.Binop.arg1);
+         HReg src2 = iselFltExpr(env, e->Iex.Binop.arg2);
+         addInstr(env, RISCV64Instr_FpBinary(op, dst, src1, src2));
+         return dst;
+      }
+      default:
+         break;
+      }
+
+      break;
+   }
+
+   /* ---------------------- UNARY OP ----------------------- */
+   case Iex_Unop: {
+      switch (e->Iex.Unop.op) {
+      case Iop_NegF32:
+      case Iop_AbsF32:
+      case Iop_NegF64:
+      case Iop_AbsF64: {
+         RISCV64FpBinaryOp op;
+         switch (e->Iex.Unop.op) {
+         case Iop_NegF32:
+            op = RISCV64op_FSGNJN_S;
+            break;
+         case Iop_AbsF32:
+            op = RISCV64op_FSGNJX_S;
+            break;
+         case Iop_NegF64:
+            op = RISCV64op_FSGNJN_D;
+            break;
+         case Iop_AbsF64:
+            op = RISCV64op_FSGNJX_D;
+            break;
+         default:
+            vassert(0);
+         }
+         HReg dst = newVRegF(env);
+         HReg src = iselFltExpr(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_FpBinary(op, dst, src, src));
+         return dst;
+      }
+      case Iop_I32StoF64: {
+         HReg dst = newVRegF(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_FpConvert(RISCV64op_FCVT_D_W, dst, src));
+         return dst;
+      }
+      case Iop_I32UtoF64: {
+         HReg dst = newVRegF(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_FpConvert(RISCV64op_FCVT_D_WU, dst, src));
+         return dst;
+      }
+      case Iop_F32toF64: {
+         HReg dst = newVRegF(env);
+         HReg src = iselFltExpr(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_FpConvert(RISCV64op_FCVT_D_S, dst, src));
+         return dst;
+      }
+      case Iop_ReinterpI32asF32: {
+         HReg dst = newVRegF(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_FpMove(RISCV64op_FMV_W_X, dst, src));
+         return dst;
+      }
+      case Iop_ReinterpI64asF64: {
+         HReg dst = newVRegF(env);
+         HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+         addInstr(env, RISCV64Instr_FpMove(RISCV64op_FMV_D_X, dst, src));
+         return dst;
+      }
+      default:
+         break;
+      }
+
+      break;
+   }
+
+   /* ------------------------- GET ------------------------- */
+   case Iex_Get: {
+      HReg dst  = newVRegF(env);
+      HReg base = get_baseblock_register();
+      Int  off  = e->Iex.Get.offset - BASEBLOCK_OFFSET_ADJUSTMENT;
+      vassert(off >= -2048 && off < 2048);
+
+      if (ty == Ity_F32)
+         addInstr(env, RISCV64Instr_FpLdSt(RISCV64op_FLW, dst, base, off));
+      else if (ty == Ity_F64)
+         addInstr(env, RISCV64Instr_FpLdSt(RISCV64op_FLD, dst, base, off));
+      else
+         vassert(0);
+      return dst;
+   }
+
+   default:
+      break;
+   }
+
+irreducible:
+   ppIRExpr(e);
+   vpanic("iselFltExpr(riscv64)");
+}
+
+/* Compute a floating-point value into a register, the identity of which is
+   returned. As with iselIntExpr_R, the register will be virtual and must not be
+   changed by subsequent code emitted by the caller. */
+static HReg iselFltExpr(ISelEnv* env, IRExpr* e)
+{
+   HReg r = iselFltExpr_wrk(env, e);
+
+   /* Sanity checks ... */
+   vassert(hregClass(r) == HRcFlt64);
+   vassert(hregIsVirtual(r));
+
+   return r;
+}
+
+/*------------------------------------------------------------*/
+/*--- ISEL: Statements                                     ---*/
+/*------------------------------------------------------------*/
+
+static void iselStmt(ISelEnv* env, IRStmt* stmt)
+{
+   if (vex_traceflags & VEX_TRACE_VCODE) {
+      vex_printf("\n-- ");
+      ppIRStmt(stmt);
+      vex_printf("\n");
+   }
+
+   switch (stmt->tag) {
+   /* ------------------------ STORE ------------------------ */
+   /* Little-endian write to memory. */
+   case Ist_Store: {
+      IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
+      if (tyd == Ity_I64 || tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
+         HReg src = iselIntExpr_R(env, stmt->Ist.Store.data);
+         /* TODO Optimize the cases with small imm Add64/Sub64. */
+         HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
+
+         if (tyd == Ity_I64)
+            addInstr(env, RISCV64Instr_Store(RISCV64op_SD, src, addr, 0));
+         else if (tyd == Ity_I32)
+            addInstr(env, RISCV64Instr_Store(RISCV64op_SW, src, addr, 0));
+         else if (tyd == Ity_I16)
+            addInstr(env, RISCV64Instr_Store(RISCV64op_SH, src, addr, 0));
+         else if (tyd == Ity_I8)
+            addInstr(env, RISCV64Instr_Store(RISCV64op_SB, src, addr, 0));
+         else
+            vassert(0);
+         return;
+      }
+      if (tyd == Ity_F32 || tyd == Ity_F64) {
+         HReg src  = iselFltExpr(env, stmt->Ist.Store.data);
+         HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
+
+         if (tyd == Ity_F32)
+            addInstr(env, RISCV64Instr_FpLdSt(RISCV64op_FSW, src, addr, 0));
+         else if (tyd == Ity_F64)
+            addInstr(env, RISCV64Instr_FpLdSt(RISCV64op_FSD, src, addr, 0));
+         else
+            vassert(0);
+         return;
+      }
+      break;
+   }
+
+   /* ------------------------- PUT ------------------------- */
+   /* Write guest state, fixed offset. */
+   case Ist_Put: {
+      IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
+      if (tyd == Ity_I64 || tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
+         HReg src  = iselIntExpr_R(env, stmt->Ist.Put.data);
+         HReg base = get_baseblock_register();
+         Int  off  = stmt->Ist.Put.offset - BASEBLOCK_OFFSET_ADJUSTMENT;
+         vassert(off >= -2048 && off < 2048);
+
+         if (tyd == Ity_I64)
+            addInstr(env, RISCV64Instr_Store(RISCV64op_SD, src, base, off));
+         else if (tyd == Ity_I32)
+            addInstr(env, RISCV64Instr_Store(RISCV64op_SW, src, base, off));
+         else if (tyd == Ity_I16)
+            addInstr(env, RISCV64Instr_Store(RISCV64op_SH, src, base, off));
+         else if (tyd == Ity_I8)
+            addInstr(env, RISCV64Instr_Store(RISCV64op_SB, src, base, off));
+         else
+            vassert(0);
+         return;
+      }
+      if (tyd == Ity_F32 || tyd == Ity_F64) {
+         HReg src  = iselFltExpr(env, stmt->Ist.Put.data);
+         HReg base = get_baseblock_register();
+         Int  off  = stmt->Ist.Put.offset - BASEBLOCK_OFFSET_ADJUSTMENT;
+         vassert(off >= -2048 && off < 2048);
+
+         if (tyd == Ity_F32)
+            addInstr(env, RISCV64Instr_FpLdSt(RISCV64op_FSW, src, base, off));
+         else if (tyd == Ity_F64)
+            addInstr(env, RISCV64Instr_FpLdSt(RISCV64op_FSD, src, base, off));
+         else
+            vassert(0);
+         return;
+      }
+      break;
+   }
+
+   /* ------------------------- TMP ------------------------- */
+   /* Assign value to temporary. */
+   case Ist_WrTmp: {
+      IRType ty = typeOfIRTemp(env->type_env, stmt->Ist.WrTmp.tmp);
+      if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8 ||
+          ty == Ity_I1) {
+         HReg dst = lookupIRTemp(env, stmt->Ist.WrTmp.tmp);
+         HReg src = iselIntExpr_R(env, stmt->Ist.WrTmp.data);
+         addInstr(env, RISCV64Instr_MV(dst, src));
+         return;
+      }
+      if (ty == Ity_F32 || ty == Ity_F64) {
+         HReg dst = lookupIRTemp(env, stmt->Ist.WrTmp.tmp);
+         HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
+         addInstr(env, RISCV64Instr_FpMove(RISCV64op_FMV_D, dst, src));
+         return;
+      }
+      break;
+   }
+
+   /* ---------------- Call to DIRTY helper ----------------- */
+   /* Call complex ("dirty") helper function. */
+   case Ist_Dirty: {
+      IRDirty* d = stmt->Ist.Dirty.details;
+
+      /* Figure out the return type, if any. */
+      IRType retty = Ity_INVALID;
+      if (d->tmp != IRTemp_INVALID)
+         retty = typeOfIRTemp(env->type_env, d->tmp);
+
+      if (retty != Ity_INVALID && retty != Ity_I8 && retty != Ity_I16 &&
+          retty != Ity_I32 && retty != Ity_I64)
+         goto stmt_fail;
+
+      /* Marshal args and do the call. */
+      UInt   addToSp = 0;
+      RetLoc rloc    = mk_RetLoc_INVALID();
+      Bool   ok =
+         doHelperCall(&addToSp, &rloc, env, d->guard, d->cee, retty, d->args);
+      if (!ok)
+         goto stmt_fail;
+      vassert(is_sane_RetLoc(rloc));
+      vassert(addToSp == 0);
+
+      /* Now figure out what to do with the returned value, if any. */
+      switch (retty) {
+      case Ity_INVALID: {
+         /* No return value. Nothing to do. */
+         vassert(d->tmp == IRTemp_INVALID);
+         vassert(rloc.pri == RLPri_None);
+         return;
+      }
+      /* The returned value is for Ity_I<x> in x10/a0. Park it in the register
+         associated with tmp. */
+      case Ity_I8:
+      case Ity_I16: {
+         vassert(rloc.pri == RLPri_Int);
+         /* Sign-extend the value returned from the helper as is expected by the
+            rest of the backend. */
+         HReg dst   = lookupIRTemp(env, d->tmp);
+         UInt shift = 64 - 8 * sizeofIRType(retty);
+         HReg tmp   = newVRegI(env);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SLLI, tmp,
+                                           hregRISCV64_x10(), shift));
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_SRAI, dst, tmp, shift));
+         return;
+      }
+      case Ity_I32: {
+         vassert(rloc.pri == RLPri_Int);
+         HReg dst = lookupIRTemp(env, d->tmp);
+         addInstr(env, RISCV64Instr_ALUImm(RISCV64op_ADDIW, dst,
+                                           hregRISCV64_x10(), 0));
+         return;
+      }
+      case Ity_I64: {
+         vassert(rloc.pri == RLPri_Int);
+         HReg dst = lookupIRTemp(env, d->tmp);
+         addInstr(env, RISCV64Instr_MV(dst, hregRISCV64_x10()));
+         return;
+      }
+      default:
+         vassert(0);
+      }
+      break;
+   }
+
+   /* ---------- Load Linked and Store Conditional ---------- */
+   case Ist_LLSC: {
+      if (stmt->Ist.LLSC.storedata == NULL) {
+         /* LL */
+         IRTemp res = stmt->Ist.LLSC.result;
+         IRType ty  = typeOfIRTemp(env->type_env, res);
+         if (ty == Ity_I32) {
+            HReg r_dst  = lookupIRTemp(env, res);
+            HReg r_addr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
+            addInstr(env, RISCV64Instr_LoadR(RISCV64op_LR_W, r_dst, r_addr));
+            return;
+         }
+      } else {
+         /* SC */
+         IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
+         if (tyd == Ity_I32) {
+            HReg r_tmp  = newVRegI(env);
+            HReg r_src  = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
+            HReg r_addr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
+            addInstr(env,
+                     RISCV64Instr_StoreC(RISCV64op_SC_W, r_tmp, r_src, r_addr));
+
+            /* Now r_tmp is non-zero if failed, 0 if success. Change to IR
+               conventions (0 is fail, 1 is success). */
+            IRTemp res   = stmt->Ist.LLSC.result;
+            HReg   r_res = lookupIRTemp(env, res);
+            IRType ty    = typeOfIRTemp(env->type_env, res);
+            vassert(ty == Ity_I1);
+            addInstr(env,
+                     RISCV64Instr_ALUImm(RISCV64op_SLTIU, r_res, r_tmp, 1));
+            return;
+         }
+      }
+      break;
+   }
+
+   /* ------------------------ ACAS ------------------------- */
+   case Ist_CAS: {
+      if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
+         /* "Normal" singleton CAS. */
+         IRCAS* cas = stmt->Ist.CAS.details;
+         IRType tyd = typeOfIRTemp(env->type_env, cas->oldLo);
+         if (tyd == Ity_I64 || tyd == Ity_I32) {
+            HReg old  = lookupIRTemp(env, cas->oldLo);
+            HReg addr = iselIntExpr_R(env, cas->addr);
+            HReg expd = iselIntExpr_R(env, cas->expdLo);
+            HReg data = iselIntExpr_R(env, cas->dataLo);
+            if (tyd == Ity_I64)
+               addInstr(env, RISCV64Instr_CAS(RISCV64op_CAS_D, old, addr, expd,
+                                              data));
+            else
+               addInstr(env, RISCV64Instr_CAS(RISCV64op_CAS_W, old, addr, expd,
+                                              data));
+            return;
+         }
+      }
+      break;
+   }
+
+   /* ---------------------- MEM FENCE ---------------------- */
+   case Ist_MBE:
+      switch (stmt->Ist.MBE.event) {
+      case Imbe_Fence:
+         addInstr(env, RISCV64Instr_FENCE());
+         return;
+      default:
+         break;
+      }
+      break;
+
+   /* --------------------- INSTR MARK ---------------------- */
+   /* Doesn't generate any executable code ... */
+   case Ist_IMark:
+      return;
+
+   /* ------------------------ NO-OP ------------------------ */
+   case Ist_NoOp:
+      return;
+
+   /* ------------------------ EXIT ------------------------- */
+   case Ist_Exit: {
+      if (stmt->Ist.Exit.dst->tag != Ico_U64)
+         vpanic("iselStmt(riscv64): Ist_Exit: dst is not a 64-bit value");
+
+      HReg cond   = iselIntExpr_R(env, stmt->Ist.Exit.guard);
+      HReg base   = get_baseblock_register();
+      Int  soff12 = stmt->Ist.Exit.offsIP - BASEBLOCK_OFFSET_ADJUSTMENT;
+      vassert(soff12 >= -2048 && soff12 < 2048);
+
+      /* Case: boring transfer to known address. */
+      if (stmt->Ist.Exit.jk == Ijk_Boring) {
+         if (env->chainingAllowed) {
+            /* .. almost always true .. */
+            /* Skip the event check at the dst if this is a forwards edge. */
+            Bool toFastEP = (Addr64)stmt->Ist.Exit.dst->Ico.U64 > env->max_ga;
+            if (0)
+               vex_printf("%s", toFastEP ? "Y" : ",");
+            addInstr(env, RISCV64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
+                                               base, soff12, cond, toFastEP));
+         } else {
+            /* .. very occasionally .. */
+            /* We can't use chaining, so ask for an assisted transfer, as
+               that's the only alternative that is allowable. */
+            HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
+            addInstr(env,
+                     RISCV64Instr_XAssisted(r, base, soff12, cond, Ijk_Boring));
+         }
+         return;
+      }
+
+      /* Case: assisted transfer to arbitrary address. */
+      switch (stmt->Ist.Exit.jk) {
+      /* Keep this list in sync with that for iselNext below. */
+      case Ijk_ClientReq:
+      case Ijk_NoDecode:
+      case Ijk_NoRedir:
+      case Ijk_Sys_syscall:
+      case Ijk_InvalICache:
+      case Ijk_SigTRAP: {
+         HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
+         addInstr(env, RISCV64Instr_XAssisted(r, base, soff12, cond,
+                                              stmt->Ist.Exit.jk));
+         return;
+      }
+      default:
+         break;
+      }
+
+      /* Do we ever expect to see any other kind? */
+      goto stmt_fail;
+   }
+
+   default:
+      break;
+   }
+
+stmt_fail:
+   ppIRStmt(stmt);
+   vpanic("iselStmt");
+}
+
+/*------------------------------------------------------------*/
+/*--- ISEL: Basic block terminators (Nexts)                ---*/
+/*------------------------------------------------------------*/
+
+static void iselNext(ISelEnv* env, IRExpr* next, IRJumpKind jk, Int offsIP)
+{
+   if (vex_traceflags & VEX_TRACE_VCODE) {
+      vex_printf("\n-- PUT(%d) = ", offsIP);
+      ppIRExpr(next);
+      vex_printf("; exit-");
+      ppIRJumpKind(jk);
+      vex_printf("\n");
+   }
+
+   HReg base   = get_baseblock_register();
+   Int  soff12 = offsIP - BASEBLOCK_OFFSET_ADJUSTMENT;
+   vassert(soff12 >= -2048 && soff12 < 2048);
+
+   /* Case: boring transfer to known address. */
+   if (next->tag == Iex_Const) {
+      IRConst* cdst = next->Iex.Const.con;
+      vassert(cdst->tag == Ico_U64);
+      if (jk == Ijk_Boring || jk == Ijk_Call) {
+         /* Boring transfer to known address. */
+         if (env->chainingAllowed) {
+            /* .. almost always true .. */
+            /* Skip the event check at the dst if this is a forwards edge. */
+            Bool toFastEP = (Addr64)cdst->Ico.U64 > env->max_ga;
+            if (0)
+               vex_printf("%s", toFastEP ? "X" : ".");
+            addInstr(env, RISCV64Instr_XDirect(cdst->Ico.U64, base, soff12,
+                                               INVALID_HREG, toFastEP));
+         } else {
+            /* .. very occasionally .. */
+            /* We can't use chaining, so ask for an assisted transfer, as that's
+               the only alternative that is allowable. */
+            HReg r = iselIntExpr_R(env, next);
+            addInstr(env, RISCV64Instr_XAssisted(r, base, soff12, INVALID_HREG,
+                                                 Ijk_Boring));
+         }
+         return;
+      }
+   }
+
+   /* Case: call/return (==boring) transfer to any address. */
+   switch (jk) {
+   case Ijk_Boring:
+   case Ijk_Ret:
+   case Ijk_Call: {
+      HReg r = iselIntExpr_R(env, next);
+      if (env->chainingAllowed)
+         addInstr(env, RISCV64Instr_XIndir(r, base, soff12, INVALID_HREG));
+      else
+         addInstr(env, RISCV64Instr_XAssisted(r, base, soff12, INVALID_HREG,
+                                              Ijk_Boring));
+      return;
+   }
+   default:
+      break;
+   }
+
+   /* Case: assisted transfer to arbitrary address. */
+   switch (jk) {
+   /* Keep this list in sync with that for Ist_Exit above. */
+   case Ijk_ClientReq:
+   case Ijk_NoDecode:
+   case Ijk_NoRedir:
+   case Ijk_Sys_syscall:
+   case Ijk_InvalICache:
+   case Ijk_SigTRAP: {
+      HReg r = iselIntExpr_R(env, next);
+      addInstr(env, RISCV64Instr_XAssisted(r, base, soff12, INVALID_HREG, jk));
+      return;
+   }
+   default:
+      break;
+   }
+
+   vex_printf("\n-- PUT(%d) = ", offsIP);
+   ppIRExpr(next);
+   vex_printf("; exit-");
+   ppIRJumpKind(jk);
+   vex_printf("\n");
+   vassert(0); /* Are we expecting any other kind? */
+}
+
+/*------------------------------------------------------------*/
+/*--- Insn selector top-level                              ---*/
+/*------------------------------------------------------------*/
+
+/* Translate an entire SB to riscv64 code. */
+
+HInstrArray* iselSB_RISCV64(const IRSB*        bb,
+                            VexArch            arch_host,
+                            const VexArchInfo* archinfo_host,
+                            const VexAbiInfo*  vbi /*UNUSED*/,
+                            Int                offs_Host_EvC_Counter,
+                            Int                offs_Host_EvC_FailAddr,
+                            Bool               chainingAllowed,
+                            Bool               addProfInc,
+                            Addr               max_ga)
+{
+   Int      i, j;
+   HReg     hreg, hregHI;
+   ISelEnv* env;
+
+   /* Do some sanity checks. */
+   vassert(arch_host == VexArchRISCV64);
+
+   /* Check that the host's endianness is as expected. */
+   vassert(archinfo_host->endness == VexEndnessLE);
+
+   /* Guard against unexpected space regressions. */
+   vassert(sizeof(RISCV64Instr) <= 32);
+
+   /* Make up an initial environment to use. */
+   env           = LibVEX_Alloc_inline(sizeof(ISelEnv));
+   env->vreg_ctr = 0;
+
+   /* Set up output code array. */
+   env->code = newHInstrArray();
+
+   /* Copy BB's type env. */
+   env->type_env = bb->tyenv;
+
+   /* Make up an IRTemp -> virtual HReg mapping. This doesn't change as we go
+      along. */
+   env->n_vregmap = bb->tyenv->types_used;
+   env->vregmap   = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
+   env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
+
+   /* and finally ... */
+   env->chainingAllowed = chainingAllowed;
+   env->hwcaps          = archinfo_host->hwcaps;
+   env->previous_rm     = NULL;
+   env->max_ga          = max_ga;
+
+   /* For each IR temporary, allocate a suitably-kinded virtual register. */
+   j = 0;
+   for (i = 0; i < env->n_vregmap; i++) {
+      hregHI = hreg = INVALID_HREG;
+      switch (bb->tyenv->types[i]) {
+      case Ity_I1:
+      case Ity_I8:
+      case Ity_I16:
+      case Ity_I32:
+      case Ity_I64:
+         hreg = mkHReg(True, HRcInt64, 0, j++);
+         break;
+      case Ity_I128:
+         hreg   = mkHReg(True, HRcInt64, 0, j++);
+         hregHI = mkHReg(True, HRcInt64, 0, j++);
+         break;
+      case Ity_F32:
+      case Ity_F64:
+         hreg = mkHReg(True, HRcFlt64, 0, j++);
+         break;
+      default:
+         ppIRType(bb->tyenv->types[i]);
+         vpanic("iselBB(riscv64): IRTemp type");
+      }
+      env->vregmap[i]   = hreg;
+      env->vregmapHI[i] = hregHI;
+   }
+   env->vreg_ctr = j;
+
+   /* The very first instruction must be an event check. */
+   HReg base             = get_baseblock_register();
+   Int  soff12_amCounter = offs_Host_EvC_Counter - BASEBLOCK_OFFSET_ADJUSTMENT;
+   vassert(soff12_amCounter >= -2048 && soff12_amCounter < 2048);
+   Int soff12_amFailAddr = offs_Host_EvC_FailAddr - BASEBLOCK_OFFSET_ADJUSTMENT;
+   vassert(soff12_amFailAddr >= -2048 && soff12_amFailAddr < 2048);
+   addInstr(env, RISCV64Instr_EvCheck(base, soff12_amCounter, base,
+                                      soff12_amFailAddr));
+
+   /* Possibly a block counter increment (for profiling). At this point we don't
+      know the address of the counter, so just pretend it is zero. It will have
+      to be patched later, but before this translation is used, by a call to
+      LibVEX_PatchProfInc(). */
+   if (addProfInc)
+      addInstr(env, RISCV64Instr_ProfInc());
+
+   /* Ok, finally we can iterate over the statements. */
+   for (i = 0; i < bb->stmts_used; i++)
+      iselStmt(env, bb->stmts[i]);
+
+   iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
+
+   /* Record the number of vregs we used. */
+   env->code->n_vregs = env->vreg_ctr;
+   return env->code;
+}
+
+/*--------------------------------------------------------------------*/
+/*--- end                                      host_riscv64_isel.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/priv/main_main.c b/priv/main_main.c
index 9a21a0469..a69cac66b 100644
--- a/priv/main_main.c
+++ b/priv/main_main.c
@@ -46,6 +46,7 @@
 #include "libvex_guest_mips32.h"
 #include "libvex_guest_mips64.h"
 #include "libvex_guest_tilegx.h"
+#include "libvex_guest_riscv64.h"
 
 #include "main_globals.h"
 #include "main_util.h"
@@ -60,6 +61,7 @@
 #include "host_s390_defs.h"
 #include "host_mips_defs.h"
 #include "host_tilegx_defs.h"
+#include "host_riscv64_defs.h"
 
 #include "guest_generic_bb_to_IR.h"
 #include "guest_x86_defs.h"
@@ -70,6 +72,7 @@
 #include "guest_s390_defs.h"
 #include "guest_mips_defs.h"
 #include "guest_tilegx_defs.h"
+#include "guest_riscv64_defs.h"
 
 #include "host_generic_simd128.h"
 
@@ -166,6 +169,14 @@
 #define TILEGXST(f) vassert(0)
 #endif
 
+#if defined(VGA_riscv64) || defined(VEXMULTIARCH)
+#define RISCV64FN(f) f
+#define RISCV64ST(f) f
+#else
+#define RISCV64FN(f) NULL
+#define RISCV64ST(f) vassert(0)
+#endif
+
 
 /* This file contains the top level interface to the library. */
 
@@ -571,6 +582,23 @@ IRSB *LibVEX_Lift (  VexTranslateArgs *vta,
          vassert(sizeof( ((VexGuestTILEGXState*)0)->guest_NRADDR     ) == 8);
          break;
 
+      case VexArchRISCV64:
+         preciseMemExnsFn
+            = RISCV64FN(guest_riscv64_state_requires_precise_mem_exns);
+         disInstrFn              = RISCV64FN(disInstr_RISCV64);
+         specHelper              = RISCV64FN(guest_riscv64_spechelper);
+         guest_layout            = RISCV64FN(&riscv64guest_layout);
+         offB_CMSTART            = offsetof(VexGuestRISCV64State,guest_CMSTART);
+         offB_CMLEN              = offsetof(VexGuestRISCV64State,guest_CMLEN);
+         offB_GUEST_IP           = offsetof(VexGuestRISCV64State,guest_pc);
+         szB_GUEST_IP            = sizeof( ((VexGuestRISCV64State*)0)->guest_pc );
+         vassert(vta->archinfo_guest.endness == VexEndnessLE);
+         vassert(0 == sizeof(VexGuestRISCV64State) % LibVEX_GUEST_STATE_ALIGN);
+         vassert(sizeof( ((VexGuestRISCV64State*)0)->guest_CMSTART ) == 8);
+         vassert(sizeof( ((VexGuestRISCV64State*)0)->guest_CMLEN   ) == 8);
+         vassert(sizeof( ((VexGuestRISCV64State*)0)->guest_NRADDR  ) == 8);
+         break;
+
       default:
          vpanic("LibVEX_Translate: unsupported guest insn set");
    }
@@ -901,6 +929,14 @@ void LibVEX_Codegen (   VexTranslateArgs *vta,
          offB_HOST_EvC_FAILADDR = offsetof(VexGuestTILEGXState,host_EvC_FAILADDR);
          break;
 
+      case VexArchRISCV64:
+         preciseMemExnsFn
+            = RISCV64FN(guest_riscv64_state_requires_precise_mem_exns);
+         guest_sizeB            = sizeof(VexGuestRISCV64State);
+         offB_HOST_EvC_COUNTER  = offsetof(VexGuestRISCV64State,host_EvC_COUNTER);
+         offB_HOST_EvC_FAILADDR = offsetof(VexGuestRISCV64State,host_EvC_FAILADDR);
+         break;
+
       default:
          vpanic("LibVEX_Codegen: unsupported guest insn set");
    }
@@ -1072,6 +1108,22 @@ void LibVEX_Codegen (   VexTranslateArgs *vta,
          vassert(vta->archinfo_host.endness == VexEndnessLE);
          break;
 
+      case VexArchRISCV64:
+         mode64       = True;
+         rRegUniv     = RISCV64FN(getRRegUniverse_RISCV64());
+         getRegUsage
+            = CAST_AS(getRegUsage) RISCV64FN(getRegUsage_RISCV64Instr);
+         mapRegs      = CAST_AS(mapRegs) RISCV64FN(mapRegs_RISCV64Instr);
+         genSpill     = CAST_AS(genSpill) RISCV64FN(genSpill_RISCV64);
+         genReload    = CAST_AS(genReload) RISCV64FN(genReload_RISCV64);
+         genMove      = CAST_AS(genMove) RISCV64FN(genMove_RISCV64);
+         ppInstr      = CAST_AS(ppInstr) RISCV64FN(ppRISCV64Instr);
+         ppReg        = CAST_AS(ppReg) RISCV64FN(ppHRegRISCV64);
+         iselSB       = RISCV64FN(iselSB_RISCV64);
+         emit         = CAST_AS(emit) RISCV64FN(emit_RISCV64Instr);
+         vassert(vta->archinfo_host.endness == VexEndnessLE);
+         break;
+
       default:
          vpanic("LibVEX_Translate: unsupported host insn set");
    }
@@ -1305,6 +1357,12 @@ VexInvalRange LibVEX_Chain ( VexArch     arch_host,
                                              place_to_chain,
                                              disp_cp_chain_me_EXPECTED,
                                              place_to_jump_to, True/*!mode64*/));
+
+      case VexArchRISCV64:
+         RISCV64ST(return chainXDirect_RISCV64(endness_host,
+                                               place_to_chain,
+                                               disp_cp_chain_me_EXPECTED,
+                                               place_to_jump_to));
       default:
          vassert(0);
    }
@@ -1369,6 +1427,11 @@ VexInvalRange LibVEX_UnChain ( VexArch     arch_host,
                                       place_to_jump_to_EXPECTED,
                                                disp_cp_chain_me, True/*!mode64*/));
 
+      case VexArchRISCV64:
+         RISCV64ST(return unchainXDirect_RISCV64(endness_host,
+                                                 place_to_unchain,
+                                                 place_to_jump_to_EXPECTED,
+                                                 disp_cp_chain_me));
       default:
          vassert(0);
    }
@@ -1399,6 +1462,8 @@ Int LibVEX_evCheckSzB ( VexArch    arch_host )
             MIPS64ST(cached = evCheckSzB_MIPS()); break;
          case VexArchTILEGX:
             TILEGXST(cached = evCheckSzB_TILEGX()); break;
+         case VexArchRISCV64:
+            RISCV64ST(cached = evCheckSzB_RISCV64()); break;
          default:
             vassert(0);
       }
@@ -1443,6 +1508,9 @@ VexInvalRange LibVEX_PatchProfInc ( VexArch    arch_host,
          TILEGXST(return patchProfInc_TILEGX(endness_host, place_to_patch,
                                              location_of_counter,
                                              True/*!mode64*/));
+      case VexArchRISCV64:
+         RISCV64ST(return patchProfInc_RISCV64(endness_host, place_to_patch,
+                                               location_of_counter));
       default:
          vassert(0);
    }
@@ -1526,6 +1594,7 @@ const HChar* LibVEX_ppVexArch ( VexArch arch )
       case VexArchMIPS32:   return "MIPS32";
       case VexArchMIPS64:   return "MIPS64";
       case VexArchTILEGX:   return "TILEGX";
+      case VexArchRISCV64:  return "RISCV64";
       default:              return "VexArch???";
    }
 }
@@ -1593,6 +1662,7 @@ static IRType arch_word_size (VexArch arch) {
       case VexArchPPC64:
       case VexArchS390X:
       case VexArchTILEGX:
+      case VexArchRISCV64:
          return Ity_I64;
 
       default:
@@ -1894,6 +1964,11 @@ static const HChar* show_hwcaps_tilegx ( UInt hwcaps )
    return "tilegx-baseline";
 }
 
+static const HChar* show_hwcaps_riscv64 ( UInt hwcaps )
+{
+   return "riscv64";
+}
+
 #undef NUM_HWCAPS
 
 /* Thie function must not return NULL. */
@@ -1901,16 +1976,17 @@ static const HChar* show_hwcaps_tilegx ( UInt hwcaps )
 static const HChar* show_hwcaps ( VexArch arch, UInt hwcaps )
 {
    switch (arch) {
-      case VexArchX86:    return show_hwcaps_x86(hwcaps);
-      case VexArchAMD64:  return show_hwcaps_amd64(hwcaps);
-      case VexArchPPC32:  return show_hwcaps_ppc32(hwcaps);
-      case VexArchPPC64:  return show_hwcaps_ppc64(hwcaps);
-      case VexArchARM:    return show_hwcaps_arm(hwcaps);
-      case VexArchARM64:  return show_hwcaps_arm64(hwcaps);
-      case VexArchS390X:  return show_hwcaps_s390x(hwcaps);
-      case VexArchMIPS32: return show_hwcaps_mips32(hwcaps);
-      case VexArchMIPS64: return show_hwcaps_mips64(hwcaps);
-      case VexArchTILEGX: return show_hwcaps_tilegx(hwcaps);
+      case VexArchX86:     return show_hwcaps_x86(hwcaps);
+      case VexArchAMD64:   return show_hwcaps_amd64(hwcaps);
+      case VexArchPPC32:   return show_hwcaps_ppc32(hwcaps);
+      case VexArchPPC64:   return show_hwcaps_ppc64(hwcaps);
+      case VexArchARM:     return show_hwcaps_arm(hwcaps);
+      case VexArchARM64:   return show_hwcaps_arm64(hwcaps);
+      case VexArchS390X:   return show_hwcaps_s390x(hwcaps);
+      case VexArchMIPS32:  return show_hwcaps_mips32(hwcaps);
+      case VexArchMIPS64:  return show_hwcaps_mips64(hwcaps);
+      case VexArchTILEGX:  return show_hwcaps_tilegx(hwcaps);
+      case VexArchRISCV64: return show_hwcaps_riscv64(hwcaps);
       default: return NULL;
    }
 }
@@ -2139,6 +2215,11 @@ static void check_hwcaps ( VexArch arch, UInt hwcaps )
       case VexArchTILEGX:
          return;
 
+      case VexArchRISCV64:
+         if (hwcaps == 0)
+            return;
+         invalid_hwcaps(arch, hwcaps, "Cannot handle capabilities\n");
+
       default:
          vpanic("unknown architecture");
    }
diff --git a/priv/main_util.h b/priv/main_util.h
index c3459e9da..d7b540cdb 100644
--- a/priv/main_util.h
+++ b/priv/main_util.h
@@ -91,6 +91,15 @@ extern Bool vex_streq ( const HChar* s1, const HChar* s2 );
 extern SizeT vex_strlen ( const HChar* str );
 extern void vex_bzero ( void* s, SizeT n );
 
+/* Math ops */
+
+/* Sign extend an N-bit value up to 64 bits, by copying bit N-1 into all higher
+   positions. */
+static inline ULong vex_sx_to_64( ULong x, UInt n )
+{
+   vassert(n > 1 && n < 64);
+   return (ULong)((Long)(x << (64 - n)) >> (64 - n));
+}
 
 /* Storage management: clear the area, and allocate from it. */
 
diff --git a/pub/libvex.h b/pub/libvex.h
index 18f331b0f..dbb0404fc 100644
--- a/pub/libvex.h
+++ b/pub/libvex.h
@@ -61,7 +61,8 @@ typedef
       VexArchS390X,
       VexArchMIPS32,
       VexArchMIPS64,
-      VexArchTILEGX
+      VexArchTILEGX,
+      VexArchRISCV64
    }
    VexArch;
 
diff --git a/pub/libvex_guest_riscv64.h b/pub/libvex_guest_riscv64.h
new file mode 100644
index 000000000..31264b124
--- /dev/null
+++ b/pub/libvex_guest_riscv64.h
@@ -0,0 +1,148 @@
+
+/*--------------------------------------------------------------------*/
+/*--- begin                                 libvex_guest_riscv64.h ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2020-2023 Petr Pavlu
+      petr.pavlu@dagobah.cz
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __LIBVEX_PUB_GUEST_RISCV64_H
+#define __LIBVEX_PUB_GUEST_RISCV64_H
+
+#include "libvex_basictypes.h"
+
+/*------------------------------------------------------------*/
+/*--- Vex's representation of the riscv64 CPU state.       ---*/
+/*------------------------------------------------------------*/
+
+typedef struct {
+   /*   0 */ ULong host_EvC_FAILADDR;
+   /*   8 */ UInt  host_EvC_COUNTER;
+   /*  12 */ UInt  pad0;
+   /*  16 */ ULong guest_x0;
+   /*  24 */ ULong guest_x1;
+   /*  32 */ ULong guest_x2;
+   /*  40 */ ULong guest_x3;
+   /*  48 */ ULong guest_x4;
+   /*  56 */ ULong guest_x5;
+   /*  64 */ ULong guest_x6;
+   /*  72 */ ULong guest_x7;
+   /*  80 */ ULong guest_x8;
+   /*  88 */ ULong guest_x9;
+   /*  96 */ ULong guest_x10;
+   /* 104 */ ULong guest_x11;
+   /* 112 */ ULong guest_x12;
+   /* 120 */ ULong guest_x13;
+   /* 128 */ ULong guest_x14;
+   /* 136 */ ULong guest_x15;
+   /* 144 */ ULong guest_x16;
+   /* 152 */ ULong guest_x17;
+   /* 160 */ ULong guest_x18;
+   /* 168 */ ULong guest_x19;
+   /* 176 */ ULong guest_x20;
+   /* 184 */ ULong guest_x21;
+   /* 192 */ ULong guest_x22;
+   /* 200 */ ULong guest_x23;
+   /* 208 */ ULong guest_x24;
+   /* 216 */ ULong guest_x25;
+   /* 224 */ ULong guest_x26;
+   /* 232 */ ULong guest_x27;
+   /* 240 */ ULong guest_x28;
+   /* 248 */ ULong guest_x29;
+   /* 256 */ ULong guest_x30;
+   /* 264 */ ULong guest_x31;
+   /* 272 */ ULong guest_pc;
+
+   /* Floating-point state. */
+   /* 280 */ ULong guest_f0;
+   /* 288 */ ULong guest_f1;
+   /* 296 */ ULong guest_f2;
+   /* 304 */ ULong guest_f3;
+   /* 312 */ ULong guest_f4;
+   /* 320 */ ULong guest_f5;
+   /* 328 */ ULong guest_f6;
+   /* 336 */ ULong guest_f7;
+   /* 344 */ ULong guest_f8;
+   /* 352 */ ULong guest_f9;
+   /* 360 */ ULong guest_f10;
+   /* 368 */ ULong guest_f11;
+   /* 376 */ ULong guest_f12;
+   /* 384 */ ULong guest_f13;
+   /* 392 */ ULong guest_f14;
+   /* 400 */ ULong guest_f15;
+   /* 408 */ ULong guest_f16;
+   /* 416 */ ULong guest_f17;
+   /* 424 */ ULong guest_f18;
+   /* 432 */ ULong guest_f19;
+   /* 440 */ ULong guest_f20;
+   /* 448 */ ULong guest_f21;
+   /* 456 */ ULong guest_f22;
+   /* 464 */ ULong guest_f23;
+   /* 472 */ ULong guest_f24;
+   /* 480 */ ULong guest_f25;
+   /* 488 */ ULong guest_f26;
+   /* 496 */ ULong guest_f27;
+   /* 504 */ ULong guest_f28;
+   /* 512 */ ULong guest_f29;
+   /* 520 */ ULong guest_f30;
+   /* 528 */ ULong guest_f31;
+   /* 536 */ UInt  guest_fcsr;
+
+   /* Various pseudo-regs mandated by Vex or Valgrind. */
+   /* Emulation notes. */
+   /* 540 */ UInt guest_EMNOTE;
+
+   /* For clflush/clinval: record start and length of area. */
+   /* 544 */ ULong guest_CMSTART;
+   /* 552 */ ULong guest_CMLEN;
+
+   /* Used to record the unredirected guest address at the start of a
+      translation whose start has been redirected. By reading this
+      pseudo-register shortly afterwards, the translation can find out what the
+      corresponding no-redirection address was. Note, this is only set for
+      wrap-style redirects, not for replace-style ones. */
+   /* 560 */ ULong guest_NRADDR;
+
+   /* Fallback LL/SC support. */
+   /* 568 */ ULong guest_LLSC_SIZE; /* 0==no transaction, else 4 or 8. */
+   /* 576 */ ULong guest_LLSC_ADDR; /* Address of the transaction. */
+   /* 584 */ ULong guest_LLSC_DATA; /* Original value at ADDR, sign-extended. */
+
+   /* Padding to 16 bytes. */
+   /* 592 */
+} VexGuestRISCV64State;
+
+/*------------------------------------------------------------*/
+/*--- Utility functions for riscv64 guest stuff.           ---*/
+/*------------------------------------------------------------*/
+
+/* ALL THE FOLLOWING ARE VISIBLE TO LIBRARY CLIENT */
+
+/* Initialise all guest riscv64 state. */
+void LibVEX_GuestRISCV64_initialise(/*OUT*/ VexGuestRISCV64State* vex_state);
+
+#endif /* ndef __LIBVEX_PUB_GUEST_RISCV64_H */
+
+/*--------------------------------------------------------------------*/
+/*---                                       libvex_guest_riscv64.h ---*/
+/*--------------------------------------------------------------------*/
diff --git a/pub/libvex_ir.h b/pub/libvex_ir.h
index 676a382f8..c1038eb88 100644
--- a/pub/libvex_ir.h
+++ b/pub/libvex_ir.h
@@ -1926,7 +1926,8 @@ typedef
       Irrm_PREPARE_SHORTER      = 5,  // Round to prepare for shorter 
                                       // precision
       Irrm_AWAY_FROM_ZERO       = 6,  // Round to away from 0
-      Irrm_NEAREST_TIE_TOWARD_0 = 7   // Round to nearest, ties towards 0
+      Irrm_NEAREST_TIE_TOWARD_0 = 7,  // Round to nearest, ties towards 0
+      Irrm_INVALID              = 8   // Invalid mode
    }
    IRRoundingMode;