diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 9cc8b84f7eb03d856900f2fb42f412d25ec5b0d5..7fd51257e0ed41b0460e584683805c2ed0f3714f 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -94,15 +94,21 @@ config LOONGARCH
 	select HAVE_DYNAMIC_FTRACE_WITH_ARGS
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS
 	select HAVE_EBPF_JIT
+	select HAVE_EFFICIENT_UNALIGNED_ACCESS if !ARCH_STRICT_ALIGN
 	select HAVE_EXIT_THREAD
 	select HAVE_FAST_GUP
 	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_FUNCTION_ARG_ACCESS_API
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_TRACER
 	select HAVE_GENERIC_VDSO
+	select HAVE_HW_BREAKPOINT if PERF_EVENTS
 	select HAVE_IOREMAP_PROT
 	select HAVE_IRQ_EXIT_ON_IRQ_STACK
 	select HAVE_IRQ_TIME_ACCOUNTING
+	select HAVE_KPROBES
+	select HAVE_KPROBES_ON_FTRACE
+	select HAVE_KRETPROBES
 	select HAVE_MOD_ARCH_SPECIFIC
 	select HAVE_NMI
 	select HAVE_PCI
@@ -441,6 +447,24 @@ config ARCH_IOREMAP
 	  protection support. However, you can enable LoongArch DMW-based
 	  ioremap() for better performance.
 
+config ARCH_STRICT_ALIGN
+	bool "Enable -mstrict-align to prevent unaligned accesses" if EXPERT
+	default y
+	help
+	  Not all LoongArch cores support h/w unaligned access, we can use
+	  -mstrict-align build parameter to prevent unaligned accesses.
+
+	  CPUs with h/w unaligned access support:
+	  Loongson-2K2000/2K3000/3A5000/3C5000/3D5000.
+
+	  CPUs without h/w unaligned access support:
+	  Loongson-2K500/2K1000.
+
+	  This option is enabled by default to make the kernel be able to run
+	  on all LoongArch systems. But you can disable it manually if you want
+	  to run kernel only on systems with h/w unaligned access support in
+	  order to optimise for performance.
+
 config KEXEC
 	bool "Kexec system call"
 	select KEXEC_CORE
@@ -454,6 +478,7 @@ config KEXEC
 
 config CRASH_DUMP
 	bool "Build kdump crash kernel"
+	select RELOCATABLE
 	help
 	  Generate crash dump after being started by kexec. This should
 	  be normally only set in special crash dump kernels which are
@@ -463,16 +488,38 @@ config CRASH_DUMP
 
 	  For more details see Documentation/admin-guide/kdump/kdump.rst
 
-config PHYSICAL_START
-	hex "Physical address where the kernel is loaded"
-	default "0x90000000a0000000"
-	depends on CRASH_DUMP
+config RELOCATABLE
+	bool "Relocatable kernel"
 	help
-	  This gives the XKPRANGE address where the kernel is loaded.
-	  If you plan to use kernel for capturing the crash dump change
-	  this value to start of the reserved region (the "X" value as
-	  specified in the "crashkernel=YM@XM" command line boot parameter
-	  passed to the panic-ed kernel).
+	  This builds the kernel as a Position Independent Executable (PIE),
+	  which retains all relocation metadata required, so as to relocate
+	  the kernel binary at runtime to a different virtual address from
+	  its link address.
+
+config RANDOMIZE_BASE
+	bool "Randomize the address of the kernel (KASLR)"
+	depends on RELOCATABLE
+	help
+	   Randomizes the physical and virtual address at which the
+	   kernel image is loaded, as a security feature that
+	   deters exploit attempts relying on knowledge of the location
+	   of kernel internals.
+
+	   The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET.
+
+	   If unsure, say N.
+
+config RANDOMIZE_BASE_MAX_OFFSET
+	hex "Maximum KASLR offset" if EXPERT
+	depends on RANDOMIZE_BASE
+	range 0x0 0x10000000
+	default "0x01000000"
+	help
+	  When KASLR is active, this provides the maximum offset that will
+	  be applied to the kernel image. It should be set according to the
+	  amount of physical RAM available in the target system.
+
+	  This is limited by the size of the lower address memory, 256MB.
 
 config SECCOMP
 	bool "Enable seccomp to safely compute untrusted bytecode"
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index 4402387d27551a3601c7a5ea49b6bf42f1377376..f71edf5741011e7c2d1e17d4400308b297947253 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -71,14 +71,15 @@ KBUILD_AFLAGS_MODULE		+= -Wa,-mla-global-with-abs
 KBUILD_CFLAGS_MODULE		+= -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
 endif
 
+ifeq ($(CONFIG_RELOCATABLE),y)
+KBUILD_CFLAGS_KERNEL		+= -fPIE
+LDFLAGS_vmlinux			+= -static -pie --no-dynamic-linker -z notext
+endif
+
 cflags-y += -ffreestanding
 cflags-y += $(call cc-option, -mno-check-zero-division)
 
-ifndef CONFIG_PHYSICAL_START
 load-y		= 0x9000000000200000
-else
-load-y		= $(CONFIG_PHYSICAL_START)
-endif
 bootvars-y	= VMLINUX_LOAD_ADDRESS=$(load-y)
 
 drivers-$(CONFIG_PCI)		+= arch/loongarch/pci/
@@ -91,10 +92,15 @@ KBUILD_CPPFLAGS += -DVMLINUX_LOAD_ADDRESS=$(load-y)
 # instead of .eh_frame so we don't discard them.
 KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
 
+ifdef CONFIG_ARCH_STRICT_ALIGN
 # Don't emit unaligned accesses.
 # Not all LoongArch cores support unaligned access, and as kernel we can't
 # rely on others to provide emulation for these accesses.
 KBUILD_CFLAGS += $(call cc-option,-mstrict-align)
+else
+# Optimise for performance on hardware supports unaligned access.
+KBUILD_CFLAGS += $(call cc-option,-mno-strict-align)
+endif
 
 KBUILD_CFLAGS += -isystem $(shell $(CC) -print-file-name=include)
 
diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig
index eb84cae642e5874cb8c09f3b06d9b8044bf6c423..e18213f01cc478e6466addbfcb72174626ab8718 100644
--- a/arch/loongarch/configs/loongson3_defconfig
+++ b/arch/loongarch/configs/loongson3_defconfig
@@ -48,6 +48,7 @@ CONFIG_HOTPLUG_CPU=y
 CONFIG_NR_CPUS=64
 CONFIG_NUMA=y
 CONFIG_KEXEC=y
+CONFIG_CRASH_DUMP=y
 CONFIG_SUSPEND=y
 CONFIG_HIBERNATION=y
 CONFIG_ACPI=y
diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h
index d342935e5a72d1de92c496087ba3f2e96e46d352..8fb699b4d40afb0ac7ccdeaf30a41e9783e30d26 100644
--- a/arch/loongarch/include/asm/addrspace.h
+++ b/arch/loongarch/include/asm/addrspace.h
@@ -125,4 +125,6 @@ extern unsigned long vm_map_base;
 #define ISA_IOSIZE	SZ_16K
 #define IO_SPACE_LIMIT	(PCI_IOSIZE - 1)
 
+#define PHYS_LINK_KADDR	PHYSADDR(VMLINUX_LOAD_ADDRESS)
+
 #endif /* _ASM_ADDRSPACE_H */
diff --git a/arch/loongarch/include/asm/asm.h b/arch/loongarch/include/asm/asm.h
index 40eea6aa469e17c0ca5847da98046171cb590762..f591b3245def69d3deff2895e6c2d9f064225cb9 100644
--- a/arch/loongarch/include/asm/asm.h
+++ b/arch/loongarch/include/asm/asm.h
@@ -188,4 +188,14 @@
 #define PTRLOG		3
 #endif
 
+/* Annotate a function as being unsuitable for kprobes. */
+#ifdef CONFIG_KPROBES
+#define _ASM_NOKPROBE(name)				\
+	.pushsection "_kprobe_blacklist", "aw";		\
+	.quad	name;					\
+	.popsection
+#else
+#define _ASM_NOKPROBE(name)
+#endif
+
 #endif /* __ASM_ASM_H */
diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h
index be037a40580d4b017725564e59ec0c511373228a..c51a1b43acb4458da6bddb6a5952b3448d036c4b 100644
--- a/arch/loongarch/include/asm/asmmacro.h
+++ b/arch/loongarch/include/asm/asmmacro.h
@@ -274,4 +274,21 @@
 	nor	\dst, \src, zero
 .endm
 
+.macro la_abs reg, sym
+#ifndef CONFIG_RELOCATABLE
+	la.abs	\reg, \sym
+#else
+	766:
+	lu12i.w	\reg, 0
+	ori	\reg, \reg, 0
+	lu32i.d	\reg, 0
+	lu52i.d	\reg, \reg, 0
+	.pushsection ".la_abs", "aw", %progbits
+	768:
+	.dword	768b-766b
+	.dword	\sym
+	.popsection
+#endif
+.endm
+
 #endif /* _ASM_ASMMACRO_H */
diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h
index 754f2850679134ee81ec9a9966ea170d88d85a7f..c3da91759472841c0f15e71b5f03603077ae6d0d 100644
--- a/arch/loongarch/include/asm/cpu.h
+++ b/arch/loongarch/include/asm/cpu.h
@@ -36,7 +36,7 @@
 
 #define PRID_SERIES_LA132	0x8000  /* Loongson 32bit */
 #define PRID_SERIES_LA264	0xa000  /* Loongson 64bit, 2-issue */
-#define PRID_SERIES_LA364	0xb000  /* Loongson 64bit,3-issue */
+#define PRID_SERIES_LA364	0xb000  /* Loongson 64bit, 3-issue */
 #define PRID_SERIES_LA464	0xc000  /* Loongson 64bit, 4-issue */
 #define PRID_SERIES_LA664	0xd000  /* Loongson 64bit, 6-issue */
 
diff --git a/arch/loongarch/include/asm/hw_breakpoint.h b/arch/loongarch/include/asm/hw_breakpoint.h
new file mode 100644
index 0000000000000000000000000000000000000000..21447fb1efc778a12c828dd9034fd8ba51db0263
--- /dev/null
+++ b/arch/loongarch/include/asm/hw_breakpoint.h
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2023 Loongson Technology Corporation Limited
+ */
+#ifndef __ASM_HW_BREAKPOINT_H
+#define __ASM_HW_BREAKPOINT_H
+
+#include <asm/loongarch.h>
+
+#ifdef __KERNEL__
+
+/* Breakpoint */
+#define LOONGARCH_BREAKPOINT_EXECUTE		(0 << 0)
+
+/* Watchpoints */
+#define LOONGARCH_BREAKPOINT_LOAD		(1 << 0)
+#define LOONGARCH_BREAKPOINT_STORE		(1 << 1)
+
+struct arch_hw_breakpoint_ctrl {
+	u32 __reserved	: 28,
+	len		: 2,
+	type		: 2;
+};
+
+struct arch_hw_breakpoint {
+	u64 address;
+	u64 mask;
+	struct arch_hw_breakpoint_ctrl ctrl;
+};
+
+/* Lengths */
+#define LOONGARCH_BREAKPOINT_LEN_1    0b11
+#define LOONGARCH_BREAKPOINT_LEN_2    0b10
+#define LOONGARCH_BREAKPOINT_LEN_4    0b01
+#define LOONGARCH_BREAKPOINT_LEN_8    0b00
+
+/*
+ * Limits.
+ * Changing these will require modifications to the register accessors.
+ */
+#define LOONGARCH_MAX_BRP		8
+#define LOONGARCH_MAX_WRP		8
+
+/* Virtual debug register bases. */
+#define CSR_CFG_ADDR	0
+#define CSR_CFG_MASK	(CSR_CFG_ADDR + LOONGARCH_MAX_BRP)
+#define CSR_CFG_CTRL	(CSR_CFG_MASK + LOONGARCH_MAX_BRP)
+#define CSR_CFG_ASID	(CSR_CFG_CTRL + LOONGARCH_MAX_WRP)
+
+/* Debug register names. */
+#define LOONGARCH_CSR_NAME_ADDR	ADDR
+#define LOONGARCH_CSR_NAME_MASK	MASK
+#define LOONGARCH_CSR_NAME_CTRL	CTRL
+#define LOONGARCH_CSR_NAME_ASID	ASID
+
+/* Accessor macros for the debug registers. */
+#define LOONGARCH_CSR_WATCH_READ(N, REG, T, VAL)			\
+do {								\
+	if (T == 0)						\
+		VAL = csr_read64(LOONGARCH_CSR_##IB##N##REG);	\
+	else							\
+		VAL = csr_read64(LOONGARCH_CSR_##DB##N##REG);	\
+} while (0)
+
+#define LOONGARCH_CSR_WATCH_WRITE(N, REG, T, VAL)			\
+do {								\
+	if (T == 0)						\
+		csr_write64(VAL, LOONGARCH_CSR_##IB##N##REG);	\
+	else							\
+		csr_write64(VAL, LOONGARCH_CSR_##DB##N##REG);	\
+} while (0)
+
+/* Exact number */
+#define CSR_FWPC_NUM		0x3f
+#define CSR_MWPC_NUM		0x3f
+
+#define CTRL_PLV_ENABLE		0x1e
+
+#define MWPnCFG3_LoadEn		8
+#define MWPnCFG3_StoreEn	9
+
+#define MWPnCFG3_Type_mask	0x3
+#define MWPnCFG3_Size_mask	0x3
+
+static inline u32 encode_ctrl_reg(struct arch_hw_breakpoint_ctrl ctrl)
+{
+	return (ctrl.len << 10) | (ctrl.type << 8);
+}
+
+static inline void decode_ctrl_reg(u32 reg, struct arch_hw_breakpoint_ctrl *ctrl)
+{
+	reg >>= 8;
+	ctrl->type = reg & MWPnCFG3_Type_mask;
+	reg >>= 2;
+	ctrl->len  = reg & MWPnCFG3_Size_mask;
+}
+
+struct task_struct;
+struct notifier_block;
+struct perf_event;
+struct perf_event_attr;
+
+extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
+				  int *gen_len, int *gen_type, int *offset);
+extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
+extern int hw_breakpoint_arch_parse(struct perf_event *bp,
+				    const struct perf_event_attr *attr,
+				    struct arch_hw_breakpoint *hw);
+extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
+					   unsigned long val, void *data);
+
+extern int arch_install_hw_breakpoint(struct perf_event *bp);
+extern void arch_uninstall_hw_breakpoint(struct perf_event *bp);
+extern int hw_breakpoint_slots(int type);
+extern void hw_breakpoint_pmu_read(struct perf_event *bp);
+
+void breakpoint_handler(struct pt_regs *regs);
+void watchpoint_handler(struct pt_regs *regs);
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+extern void ptrace_hw_copy_thread(struct task_struct *task);
+extern void hw_breakpoint_thread_switch(struct task_struct *next);
+#else
+static inline void ptrace_hw_copy_thread(struct task_struct *task)
+{
+}
+static inline void hw_breakpoint_thread_switch(struct task_struct *next)
+{
+}
+#endif
+
+/* Determine number of BRP registers available. */
+static inline int get_num_brps(void)
+{
+	return csr_read64(LOONGARCH_CSR_FWPC) & CSR_FWPC_NUM;
+}
+
+/* Determine number of WRP registers available. */
+static inline int get_num_wrps(void)
+{
+	return csr_read64(LOONGARCH_CSR_MWPC) & CSR_MWPC_NUM;
+}
+
+#endif	/* __KERNEL__ */
+#endif	/* __ASM_BREAKPOINT_H */
diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
index 7eedd83fd0d72127fc570cd87dc759f1d8b6b12f..a04fe755d71932d298189b608f8d786fd3c6edef 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -7,6 +7,7 @@
 
 #include <linux/types.h>
 #include <asm/asm.h>
+#include <asm/ptrace.h>
 
 #define INSN_NOP		0x03400000
 #define INSN_BREAK		0x002a0000
@@ -23,6 +24,10 @@
 
 #define ADDR_IMM(addr, INSN)	((addr & ADDR_IMMMASK_##INSN) >> ADDR_IMMSHIFT_##INSN)
 
+enum reg0i15_op {
+	break_op	= 0x54,
+};
+
 enum reg0i26_op {
 	b_op		= 0x14,
 	bl_op		= 0x15,
@@ -32,6 +37,7 @@ enum reg1i20_op {
 	lu12iw_op	= 0x0a,
 	lu32id_op	= 0x0b,
 	pcaddi_op	= 0x0c,
+	pcalau12i_op	= 0x0d,
 	pcaddu12i_op	= 0x0e,
 	pcaddu18i_op	= 0x0f,
 };
@@ -178,6 +184,11 @@ enum reg3sa2_op {
 	alsld_op	= 0x16,
 };
 
+struct reg0i15_format {
+	unsigned int immediate : 15;
+	unsigned int opcode : 17;
+};
+
 struct reg0i26_format {
 	unsigned int immediate_h : 10;
 	unsigned int immediate_l : 16;
@@ -263,6 +274,7 @@ struct reg3sa2_format {
 
 union loongarch_instruction {
 	unsigned int word;
+	struct reg0i15_format	reg0i15_format;
 	struct reg0i26_format	reg0i26_format;
 	struct reg1i20_format	reg1i20_format;
 	struct reg1i21_format	reg1i21_format;
@@ -321,6 +333,11 @@ static inline bool is_imm_negative(unsigned long val, unsigned int bit)
 	return val & (1UL << (bit - 1));
 }
 
+static inline bool is_break_ins(union loongarch_instruction *ip)
+{
+	return ip->reg0i15_format.opcode == break_op;
+}
+
 static inline bool is_pc_ins(union loongarch_instruction *ip)
 {
 	return ip->reg1i20_format.opcode >= pcaddi_op &&
@@ -351,6 +368,47 @@ static inline bool is_stack_alloc_ins(union loongarch_instruction *ip)
 		is_imm12_negative(ip->reg2i12_format.immediate);
 }
 
+static inline bool is_self_loop_ins(union loongarch_instruction *ip, struct pt_regs *regs)
+{
+	switch (ip->reg0i26_format.opcode) {
+	case b_op:
+	case bl_op:
+		if (ip->reg0i26_format.immediate_l == 0
+		    && ip->reg0i26_format.immediate_h == 0)
+			return true;
+	}
+
+	switch (ip->reg1i21_format.opcode) {
+	case beqz_op:
+	case bnez_op:
+	case bceqz_op:
+		if (ip->reg1i21_format.immediate_l == 0
+		    && ip->reg1i21_format.immediate_h == 0)
+			return true;
+	}
+
+	switch (ip->reg2i16_format.opcode) {
+	case beq_op:
+	case bne_op:
+	case blt_op:
+	case bge_op:
+	case bltu_op:
+	case bgeu_op:
+		if (ip->reg2i16_format.immediate == 0)
+			return true;
+		break;
+	case jirl_op:
+		if (regs->regs[ip->reg2i16_format.rj] +
+		    ((unsigned long)ip->reg2i16_format.immediate << 2) == (unsigned long)ip)
+			return true;
+	}
+
+	return false;
+}
+
+void simu_pc(struct pt_regs *regs, union loongarch_instruction insn);
+void simu_branch(struct pt_regs *regs, union loongarch_instruction insn);
+
 int larch_insn_read(void *addr, u32 *insnp);
 int larch_insn_write(void *addr, u32 insn);
 int larch_insn_patch_text(void *addr, u32 insn);
diff --git a/arch/loongarch/include/asm/kprobes.h b/arch/loongarch/include/asm/kprobes.h
new file mode 100644
index 0000000000000000000000000000000000000000..798020ae02c694b8623206383ebb5d1417e6683c
--- /dev/null
+++ b/arch/loongarch/include/asm/kprobes.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_LOONGARCH_KPROBES_H
+#define __ASM_LOONGARCH_KPROBES_H
+
+#include <asm-generic/kprobes.h>
+
+#ifdef CONFIG_KPROBES
+
+#include <asm/inst.h>
+#include <asm/cacheflush.h>
+
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+#define MAX_INSN_SIZE			2
+
+#define flush_insn_slot(p)						\
+do {									\
+	if (p->addr)							\
+		flush_icache_range((unsigned long)p->addr,		\
+			   (unsigned long)p->addr +			\
+			   (MAX_INSN_SIZE * sizeof(kprobe_opcode_t)));	\
+} while (0)
+
+#define kretprobe_blacklist_size	0
+
+typedef union loongarch_instruction kprobe_opcode_t;
+
+/* Architecture specific copy of original instruction */
+struct arch_specific_insn {
+	/* copy of the original instruction */
+	kprobe_opcode_t *insn;
+	/* restore address after simulation */
+	unsigned long restore;
+};
+
+struct prev_kprobe {
+	struct kprobe *kp;
+	unsigned int status;
+};
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+	unsigned int kprobe_status;
+	unsigned long saved_status;
+	struct prev_kprobe prev_kprobe;
+};
+
+void arch_remove_kprobe(struct kprobe *p);
+bool kprobe_fault_handler(struct pt_regs *regs, int trapnr);
+bool kprobe_breakpoint_handler(struct pt_regs *regs);
+bool kprobe_singlestep_handler(struct pt_regs *regs);
+
+void __kretprobe_trampoline(void);
+void *trampoline_probe_handler(struct pt_regs *regs);
+
+#else /* !CONFIG_KPROBES */
+
+static inline bool kprobe_breakpoint_handler(struct pt_regs *regs) { return false; }
+static inline bool kprobe_singlestep_handler(struct pt_regs *regs) { return false; }
+
+#endif /* CONFIG_KPROBES */
+#endif /* __ASM_LOONGARCH_KPROBES_H */
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index 7f8d57a61c8bdd1c7cd9d2d77fdbe23bc10e89e3..65b7dcdea16d0f4f59bfed5cbee2444767327a6c 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -970,42 +970,42 @@ static __always_inline void iocsr_write64(u64 val, u32 reg)
 
 #define LOONGARCH_CSR_DB0ADDR		0x310	/* data breakpoint 0 address */
 #define LOONGARCH_CSR_DB0MASK		0x311	/* data breakpoint 0 mask */
-#define LOONGARCH_CSR_DB0CTL		0x312	/* data breakpoint 0 control */
+#define LOONGARCH_CSR_DB0CTRL		0x312	/* data breakpoint 0 control */
 #define LOONGARCH_CSR_DB0ASID		0x313	/* data breakpoint 0 asid */
 
 #define LOONGARCH_CSR_DB1ADDR		0x318	/* data breakpoint 1 address */
 #define LOONGARCH_CSR_DB1MASK		0x319	/* data breakpoint 1 mask */
-#define LOONGARCH_CSR_DB1CTL		0x31a	/* data breakpoint 1 control */
+#define LOONGARCH_CSR_DB1CTRL		0x31a	/* data breakpoint 1 control */
 #define LOONGARCH_CSR_DB1ASID		0x31b	/* data breakpoint 1 asid */
 
 #define LOONGARCH_CSR_DB2ADDR		0x320	/* data breakpoint 2 address */
 #define LOONGARCH_CSR_DB2MASK		0x321	/* data breakpoint 2 mask */
-#define LOONGARCH_CSR_DB2CTL		0x322	/* data breakpoint 2 control */
+#define LOONGARCH_CSR_DB2CTRL		0x322	/* data breakpoint 2 control */
 #define LOONGARCH_CSR_DB2ASID		0x323	/* data breakpoint 2 asid */
 
 #define LOONGARCH_CSR_DB3ADDR		0x328	/* data breakpoint 3 address */
 #define LOONGARCH_CSR_DB3MASK		0x329	/* data breakpoint 3 mask */
-#define LOONGARCH_CSR_DB3CTL		0x32a	/* data breakpoint 3 control */
+#define LOONGARCH_CSR_DB3CTRL		0x32a	/* data breakpoint 3 control */
 #define LOONGARCH_CSR_DB3ASID		0x32b	/* data breakpoint 3 asid */
 
 #define LOONGARCH_CSR_DB4ADDR		0x330	/* data breakpoint 4 address */
 #define LOONGARCH_CSR_DB4MASK		0x331	/* data breakpoint 4 maks */
-#define LOONGARCH_CSR_DB4CTL		0x332	/* data breakpoint 4 control */
+#define LOONGARCH_CSR_DB4CTRL		0x332	/* data breakpoint 4 control */
 #define LOONGARCH_CSR_DB4ASID		0x333	/* data breakpoint 4 asid */
 
 #define LOONGARCH_CSR_DB5ADDR		0x338	/* data breakpoint 5 address */
 #define LOONGARCH_CSR_DB5MASK		0x339	/* data breakpoint 5 mask */
-#define LOONGARCH_CSR_DB5CTL		0x33a	/* data breakpoint 5 control */
+#define LOONGARCH_CSR_DB5CTRL		0x33a	/* data breakpoint 5 control */
 #define LOONGARCH_CSR_DB5ASID		0x33b	/* data breakpoint 5 asid */
 
 #define LOONGARCH_CSR_DB6ADDR		0x340	/* data breakpoint 6 address */
 #define LOONGARCH_CSR_DB6MASK		0x341	/* data breakpoint 6 mask */
-#define LOONGARCH_CSR_DB6CTL		0x342	/* data breakpoint 6 control */
+#define LOONGARCH_CSR_DB6CTRL		0x342	/* data breakpoint 6 control */
 #define LOONGARCH_CSR_DB6ASID		0x343	/* data breakpoint 6 asid */
 
 #define LOONGARCH_CSR_DB7ADDR		0x348	/* data breakpoint 7 address */
 #define LOONGARCH_CSR_DB7MASK		0x349	/* data breakpoint 7 mask */
-#define LOONGARCH_CSR_DB7CTL		0x34a	/* data breakpoint 7 control */
+#define LOONGARCH_CSR_DB7CTRL		0x34a	/* data breakpoint 7 control */
 #define LOONGARCH_CSR_DB7ASID		0x34b	/* data breakpoint 7 asid */
 
 #define LOONGARCH_CSR_FWPC		0x380	/* instruction breakpoint config */
@@ -1013,48 +1013,51 @@ static __always_inline void iocsr_write64(u64 val, u32 reg)
 
 #define LOONGARCH_CSR_IB0ADDR		0x390	/* inst breakpoint 0 address */
 #define LOONGARCH_CSR_IB0MASK		0x391	/* inst breakpoint 0 mask */
-#define LOONGARCH_CSR_IB0CTL		0x392	/* inst breakpoint 0 control */
+#define LOONGARCH_CSR_IB0CTRL		0x392	/* inst breakpoint 0 control */
 #define LOONGARCH_CSR_IB0ASID		0x393	/* inst breakpoint 0 asid */
 
 #define LOONGARCH_CSR_IB1ADDR		0x398	/* inst breakpoint 1 address */
 #define LOONGARCH_CSR_IB1MASK		0x399	/* inst breakpoint 1 mask */
-#define LOONGARCH_CSR_IB1CTL		0x39a	/* inst breakpoint 1 control */
+#define LOONGARCH_CSR_IB1CTRL		0x39a	/* inst breakpoint 1 control */
 #define LOONGARCH_CSR_IB1ASID		0x39b	/* inst breakpoint 1 asid */
 
 #define LOONGARCH_CSR_IB2ADDR		0x3a0	/* inst breakpoint 2 address */
 #define LOONGARCH_CSR_IB2MASK		0x3a1	/* inst breakpoint 2 mask */
-#define LOONGARCH_CSR_IB2CTL		0x3a2	/* inst breakpoint 2 control */
+#define LOONGARCH_CSR_IB2CTRL		0x3a2	/* inst breakpoint 2 control */
 #define LOONGARCH_CSR_IB2ASID		0x3a3	/* inst breakpoint 2 asid */
 
 #define LOONGARCH_CSR_IB3ADDR		0x3a8	/* inst breakpoint 3 address */
 #define LOONGARCH_CSR_IB3MASK		0x3a9	/* breakpoint 3 mask */
-#define LOONGARCH_CSR_IB3CTL		0x3aa	/* inst breakpoint 3 control */
+#define LOONGARCH_CSR_IB3CTRL		0x3aa	/* inst breakpoint 3 control */
 #define LOONGARCH_CSR_IB3ASID		0x3ab	/* inst breakpoint 3 asid */
 
 #define LOONGARCH_CSR_IB4ADDR		0x3b0	/* inst breakpoint 4 address */
 #define LOONGARCH_CSR_IB4MASK		0x3b1	/* inst breakpoint 4 mask */
-#define LOONGARCH_CSR_IB4CTL		0x3b2	/* inst breakpoint 4 control */
+#define LOONGARCH_CSR_IB4CTRL		0x3b2	/* inst breakpoint 4 control */
 #define LOONGARCH_CSR_IB4ASID		0x3b3	/* inst breakpoint 4 asid */
 
 #define LOONGARCH_CSR_IB5ADDR		0x3b8	/* inst breakpoint 5 address */
 #define LOONGARCH_CSR_IB5MASK		0x3b9	/* inst breakpoint 5 mask */
-#define LOONGARCH_CSR_IB5CTL		0x3ba	/* inst breakpoint 5 control */
+#define LOONGARCH_CSR_IB5CTRL		0x3ba	/* inst breakpoint 5 control */
 #define LOONGARCH_CSR_IB5ASID		0x3bb	/* inst breakpoint 5 asid */
 
 #define LOONGARCH_CSR_IB6ADDR		0x3c0	/* inst breakpoint 6 address */
 #define LOONGARCH_CSR_IB6MASK		0x3c1	/* inst breakpoint 6 mask */
-#define LOONGARCH_CSR_IB6CTL		0x3c2	/* inst breakpoint 6 control */
+#define LOONGARCH_CSR_IB6CTRL		0x3c2	/* inst breakpoint 6 control */
 #define LOONGARCH_CSR_IB6ASID		0x3c3	/* inst breakpoint 6 asid */
 
 #define LOONGARCH_CSR_IB7ADDR		0x3c8	/* inst breakpoint 7 address */
 #define LOONGARCH_CSR_IB7MASK		0x3c9	/* inst breakpoint 7 mask */
-#define LOONGARCH_CSR_IB7CTL		0x3ca	/* inst breakpoint 7 control */
+#define LOONGARCH_CSR_IB7CTRL		0x3ca	/* inst breakpoint 7 control */
 #define LOONGARCH_CSR_IB7ASID		0x3cb	/* inst breakpoint 7 asid */
 
 #define LOONGARCH_CSR_DEBUG		0x500	/* debug config */
 #define LOONGARCH_CSR_DERA		0x501	/* debug era */
 #define LOONGARCH_CSR_DESAVE		0x502	/* debug save */
 
+#define CSR_FWPC_SKIP_SHIFT		16
+#define CSR_FWPC_SKIP			(_ULCAST_(1) << CSR_FWPC_SKIP_SHIFT)
+
 /*
  * CSR_ECFG IM
  */
diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h
index 7184f1dc61f2784b399b254c6618f7bde54ddb15..636e1c66398c17c7bfb930b5b225bd241f831a45 100644
--- a/arch/loongarch/include/asm/processor.h
+++ b/arch/loongarch/include/asm/processor.h
@@ -11,6 +11,7 @@
 
 #include <asm/cpu.h>
 #include <asm/cpu-info.h>
+#include <asm/hw_breakpoint.h>
 #include <asm/loongarch.h>
 #include <asm/vdso/processor.h>
 #include <uapi/asm/ptrace.h>
@@ -124,13 +125,18 @@ struct thread_struct {
 	/* Other stuff associated with the thread. */
 	unsigned long trap_nr;
 	unsigned long error_code;
+	unsigned long single_step; /* Used by PTRACE_SINGLESTEP */
 	struct loongarch_vdso_info *vdso;
 
 	/*
-	 * FPU & vector registers, must be at last because
-	 * they are conditionally copied at fork().
+	 * FPU & vector registers, must be at the last of inherited
+	 * context because they are conditionally copied at fork().
 	 */
 	struct loongarch_fpu fpu FPU_ALIGN;
+
+	/* Hardware breakpoints pinned to this task. */
+	struct perf_event *hbp_break[LOONGARCH_MAX_BRP];
+	struct perf_event *hbp_watch[LOONGARCH_MAX_WRP];
 };
 
 #define thread_saved_ra(tsk)	(tsk->thread.sched_ra)
@@ -172,6 +178,8 @@ struct thread_struct {
 		.fcc		= 0,				\
 		.fpr		= {{{0,},},},			\
 	},							\
+	.hbp_break		= {0},				\
+	.hbp_watch		= {0},				\
 }
 
 struct task_struct;
@@ -184,10 +192,6 @@ extern unsigned long		boot_option_idle_override;
  */
 extern void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp);
 
-static inline void flush_thread(void)
-{
-}
-
 unsigned long __get_wchan(struct task_struct *p);
 
 #define __KSTK_TOS(tsk) ((unsigned long)task_stack_page(tsk) + \
diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h
index 59c4608de91db938a6eb805db3da8f7b68948278..d761db943335ce707e9f7059e677abcc05195a16 100644
--- a/arch/loongarch/include/asm/ptrace.h
+++ b/arch/loongarch/include/asm/ptrace.h
@@ -6,6 +6,7 @@
 #define _ASM_PTRACE_H
 
 #include <asm/page.h>
+#include <asm/irqflags.h>
 #include <asm/thread_info.h>
 #include <uapi/asm/ptrace.h>
 
@@ -109,6 +110,40 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsi
 
 struct task_struct;
 
+/**
+ * regs_get_kernel_argument() - get Nth function argument in kernel
+ * @regs:       pt_regs of that context
+ * @n:          function argument number (start from 0)
+ *
+ * regs_get_argument() returns @n th argument of the function call.
+ * Note that this chooses most probably assignment, in some case
+ * it can be incorrect.
+ * This is expected to be called from kprobes or ftrace with regs
+ * where the top of stack is the return address.
+ */
+static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
+						     unsigned int n)
+{
+#define NR_REG_ARGUMENTS 8
+	static const unsigned int args[] = {
+		offsetof(struct pt_regs, regs[4]),
+		offsetof(struct pt_regs, regs[5]),
+		offsetof(struct pt_regs, regs[6]),
+		offsetof(struct pt_regs, regs[7]),
+		offsetof(struct pt_regs, regs[8]),
+		offsetof(struct pt_regs, regs[9]),
+		offsetof(struct pt_regs, regs[10]),
+		offsetof(struct pt_regs, regs[11]),
+	};
+
+	if (n < NR_REG_ARGUMENTS)
+		return regs_get_register(regs, args[n]);
+	else {
+		n -= NR_REG_ARGUMENTS;
+		return regs_get_kernel_stack_nth(regs, n);
+	}
+}
+
 /*
  * Does the process account for user or for system time?
  */
@@ -149,4 +184,8 @@ static inline void user_stack_pointer_set(struct pt_regs *regs,
 	regs->regs[3] = val;
 }
 
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+#define arch_has_single_step()		(1)
+#endif
+
 #endif /* _ASM_PTRACE_H */
diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h
index 72ead58039f3e159ae13c7e066c4ee9793aa2208..be05c0e706a2e23d1e5c82859f02be62c82ce0b9 100644
--- a/arch/loongarch/include/asm/setup.h
+++ b/arch/loongarch/include/asm/setup.h
@@ -21,4 +21,20 @@ extern void per_cpu_trap_init(int cpu);
 extern void set_handler(unsigned long offset, void *addr, unsigned long len);
 extern void set_merr_handler(unsigned long offset, void *addr, unsigned long len);
 
+#ifdef CONFIG_RELOCATABLE
+
+struct rela_la_abs {
+	long offset;
+	long symvalue;
+};
+
+extern long __la_abs_begin;
+extern long __la_abs_end;
+extern long __rela_dyn_begin;
+extern long __rela_dyn_end;
+
+extern void * __init relocate_kernel(void);
+
+#endif
+
 #endif /* __SETUP_H */
diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h
index 4ca953062b5be2c2a76f5176c7e4977284981e6b..7df80e6ae9d2c8d76af305b7478bd52776ed380a 100644
--- a/arch/loongarch/include/asm/stackframe.h
+++ b/arch/loongarch/include/asm/stackframe.h
@@ -7,6 +7,7 @@
 
 #include <linux/threads.h>
 
+#include <asm/addrspace.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/asm-offsets.h>
@@ -36,6 +37,14 @@
 	cfi_restore \reg \offset \docfi
 	.endm
 
+/* Jump to the runtime virtual address. */
+	.macro JUMP_VIRT_ADDR temp1 temp2
+	li.d	\temp1, CACHE_BASE
+	pcaddi	\temp2, 0
+	or	\temp1, \temp1, \temp2
+	jirl	zero, \temp1, 0xc
+	.endm
+
 	.macro BACKUP_T0T1
 	csrwr	t0, EXCEPTION_KS0
 	csrwr	t1, EXCEPTION_KS1
@@ -77,7 +86,7 @@
  * new value in sp.
  */
 	.macro	get_saved_sp docfi=0
-	la.abs	  t1, kernelsp
+	la_abs	  t1, kernelsp
 #ifdef CONFIG_SMP
 	csrrd	  t0, PERCPU_BASE_KS
 	LONG_ADD  t1, t1, t0
@@ -90,7 +99,7 @@
 	.endm
 
 	.macro	set_saved_sp stackp temp temp2
-	la.abs	  \temp, kernelsp
+	la.pcrel  \temp, kernelsp
 #ifdef CONFIG_SMP
 	LONG_ADD  \temp, \temp, u0
 #endif
diff --git a/arch/loongarch/include/asm/switch_to.h b/arch/loongarch/include/asm/switch_to.h
index 43a5ab162d38b917781a14acf3e3da92411d8a50..24e3094bebab166c046264280ef9b5929b350377 100644
--- a/arch/loongarch/include/asm/switch_to.h
+++ b/arch/loongarch/include/asm/switch_to.h
@@ -34,6 +34,7 @@ extern asmlinkage struct task_struct *__switch_to(struct task_struct *prev,
 #define switch_to(prev, next, last)						\
 do {										\
 	lose_fpu_inatomic(1, prev);						\
+	hw_breakpoint_thread_switch(next);					\
 	(last) = __switch_to(prev, next, task_thread_info(next),		\
 		 __builtin_return_address(0), __builtin_frame_address(0));	\
 } while (0)
diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h
index 255899d4a7c36a73ad9802b6f71c4bf75a0ae9f6..0d22991ae430d4598743743a07ce6f925a01d4e6 100644
--- a/arch/loongarch/include/asm/uaccess.h
+++ b/arch/loongarch/include/asm/uaccess.h
@@ -22,7 +22,6 @@
 extern u64 __ua_limit;
 
 #define __UA_ADDR	".dword"
-#define __UA_LA		"la.abs"
 #define __UA_LIMIT	__ua_limit
 
 /*
diff --git a/arch/loongarch/include/uapi/asm/ptrace.h b/arch/loongarch/include/uapi/asm/ptrace.h
index 083193f4a5d5d8e1771872b6f90610347f8fde01..cc48ed262021244bcbcc7e0f98b63d8dc7e4b9de 100644
--- a/arch/loongarch/include/uapi/asm/ptrace.h
+++ b/arch/loongarch/include/uapi/asm/ptrace.h
@@ -46,6 +46,15 @@ struct user_fp_state {
 	uint32_t    fcsr;
 };
 
+struct user_watch_state {
+	uint16_t dbg_info;
+	struct {
+		uint64_t    addr;
+		uint64_t    mask;
+		uint32_t    ctrl;
+	} dbg_regs[8];
+};
+
 #define PTRACE_SYSEMU			0x1f
 #define PTRACE_SYSEMU_SINGLESTEP	0x20
 
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index c8cfbd562921d63fe2a15aa6fb8a6940ff356fd4..78d4e33843054a98be9827f464e0a666efc1ab6e 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -8,13 +8,15 @@ extra-y		:= vmlinux.lds
 obj-y		+= head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
 		   traps.o irq.o idle.o process.o dma.o mem.o io.o reset.o switch.o \
 		   elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o \
-		   alternative.o unaligned.o unwind.o
+		   alternative.o unwind.o
 
 obj-$(CONFIG_ACPI)		+= acpi.o
 obj-$(CONFIG_EFI) 		+= efi.o
 
 obj-$(CONFIG_CPU_HAS_FPU)	+= fpu.o
 
+obj-$(CONFIG_ARCH_STRICT_ALIGN)	+= unaligned.o
+
 ifdef CONFIG_FUNCTION_TRACER
   ifndef CONFIG_DYNAMIC_FTRACE
     obj-y += mcount.o ftrace.o
@@ -39,6 +41,8 @@ obj-$(CONFIG_NUMA)		+= numa.o
 
 obj-$(CONFIG_MAGIC_SYSRQ)	+= sysrq.o
 
+obj-$(CONFIG_RELOCATABLE)	+= relocate.o
+
 obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 
@@ -46,5 +50,8 @@ obj-$(CONFIG_UNWINDER_GUESS)	+= unwind_guess.o
 obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o
 
 obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_regs.o
+obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
+
+obj-$(CONFIG_KPROBES)		+= kprobes.o kprobes_trampoline.o
 
 CPPFLAGS_vmlinux.lds		:= $(KBUILD_CFLAGS)
diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S
index d53b631c90227cc4bbaf06192b90404afcc53493..d737e3cf42d3fd8ca882e08113d58d2887db32c1 100644
--- a/arch/loongarch/kernel/entry.S
+++ b/arch/loongarch/kernel/entry.S
@@ -19,70 +19,71 @@
 	.cfi_sections	.debug_frame
 	.align	5
 SYM_FUNC_START(handle_syscall)
-	csrrd	t0, PERCPU_BASE_KS
-	la.abs	t1, kernelsp
-	add.d	t1, t1, t0
-	move	t2, sp
-	ld.d	sp, t1, 0
+	csrrd		t0, PERCPU_BASE_KS
+	la.pcrel	t1, kernelsp
+	add.d		t1, t1, t0
+	move		t2, sp
+	ld.d		sp, t1, 0
 
-	addi.d	sp, sp, -PT_SIZE
-	cfi_st	t2, PT_R3
+	addi.d		sp, sp, -PT_SIZE
+	cfi_st		t2, PT_R3
 	cfi_rel_offset	sp, PT_R3
-	st.d	zero, sp, PT_R0
-	csrrd	t2, LOONGARCH_CSR_PRMD
-	st.d	t2, sp, PT_PRMD
-	csrrd	t2, LOONGARCH_CSR_CRMD
-	st.d	t2, sp, PT_CRMD
-	csrrd	t2, LOONGARCH_CSR_EUEN
-	st.d	t2, sp, PT_EUEN
-	csrrd	t2, LOONGARCH_CSR_ECFG
-	st.d	t2, sp, PT_ECFG
-	csrrd	t2, LOONGARCH_CSR_ESTAT
-	st.d	t2, sp, PT_ESTAT
-	cfi_st	ra, PT_R1
-	cfi_st	a0, PT_R4
-	cfi_st	a1, PT_R5
-	cfi_st	a2, PT_R6
-	cfi_st	a3, PT_R7
-	cfi_st	a4, PT_R8
-	cfi_st	a5, PT_R9
-	cfi_st	a6, PT_R10
-	cfi_st	a7, PT_R11
-	csrrd	ra, LOONGARCH_CSR_ERA
-	st.d	ra, sp, PT_ERA
+	st.d		zero, sp, PT_R0
+	csrrd		t2, LOONGARCH_CSR_PRMD
+	st.d		t2, sp, PT_PRMD
+	csrrd		t2, LOONGARCH_CSR_CRMD
+	st.d		t2, sp, PT_CRMD
+	csrrd		t2, LOONGARCH_CSR_EUEN
+	st.d		t2, sp, PT_EUEN
+	csrrd		t2, LOONGARCH_CSR_ECFG
+	st.d		t2, sp, PT_ECFG
+	csrrd		t2, LOONGARCH_CSR_ESTAT
+	st.d		t2, sp, PT_ESTAT
+	cfi_st		ra, PT_R1
+	cfi_st		a0, PT_R4
+	cfi_st		a1, PT_R5
+	cfi_st		a2, PT_R6
+	cfi_st		a3, PT_R7
+	cfi_st		a4, PT_R8
+	cfi_st		a5, PT_R9
+	cfi_st		a6, PT_R10
+	cfi_st		a7, PT_R11
+	csrrd		ra, LOONGARCH_CSR_ERA
+	st.d		ra, sp, PT_ERA
 	cfi_rel_offset	ra, PT_ERA
 
-	cfi_st	tp, PT_R2
-	cfi_st	u0, PT_R21
-	cfi_st	fp, PT_R22
+	cfi_st		tp, PT_R2
+	cfi_st		u0, PT_R21
+	cfi_st		fp, PT_R22
 
 	SAVE_STATIC
 
-	move	u0, t0
-	li.d	tp, ~_THREAD_MASK
-	and	tp, tp, sp
+	move		u0, t0
+	li.d		tp, ~_THREAD_MASK
+	and		tp, tp, sp
 
-	move	a0, sp
-	bl	do_syscall
+	move		a0, sp
+	bl		do_syscall
 
 	RESTORE_ALL_AND_RET
 SYM_FUNC_END(handle_syscall)
+_ASM_NOKPROBE(handle_syscall)
 
 SYM_CODE_START(ret_from_fork)
-	bl	schedule_tail		# a0 = struct task_struct *prev
-	move	a0, sp
-	bl 	syscall_exit_to_user_mode
+	bl		schedule_tail		# a0 = struct task_struct *prev
+	move		a0, sp
+	bl 		syscall_exit_to_user_mode
 	RESTORE_STATIC
 	RESTORE_SOME
 	RESTORE_SP_AND_RET
 SYM_CODE_END(ret_from_fork)
 
 SYM_CODE_START(ret_from_kernel_thread)
-	bl	schedule_tail		# a0 = struct task_struct *prev
-	move	a0, s1
-	jirl	ra, s0, 0
-	move	a0, sp
-	bl	syscall_exit_to_user_mode
+	bl		schedule_tail		# a0 = struct task_struct *prev
+	move		a0, s1
+	jirl		ra, s0, 0
+	move		a0, sp
+	bl		syscall_exit_to_user_mode
 	RESTORE_STATIC
 	RESTORE_SOME
 	RESTORE_SP_AND_RET
diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c
index 0f07591cab3096b6cb620cecc56ec508c7dddc31..4a3ef8516ccc6ff92cb73f7afe2c3361bec1a124 100644
--- a/arch/loongarch/kernel/ftrace_dyn.c
+++ b/arch/loongarch/kernel/ftrace_dyn.c
@@ -6,6 +6,7 @@
  */
 
 #include <linux/ftrace.h>
+#include <linux/kprobes.h>
 #include <linux/uaccess.h>
 
 #include <asm/inst.h>
@@ -271,3 +272,66 @@ int ftrace_disable_ftrace_graph_caller(void)
 }
 #endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+#ifdef CONFIG_KPROBES_ON_FTRACE
+/* Ftrace callback handler for kprobes -- called under preepmt disabled */
+void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+			   struct ftrace_ops *ops, struct ftrace_regs *fregs)
+{
+	int bit;
+	struct pt_regs *regs;
+	struct kprobe *p;
+	struct kprobe_ctlblk *kcb;
+
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
+	if (bit < 0)
+		return;
+
+	p = get_kprobe((kprobe_opcode_t *)ip);
+	if (unlikely(!p) || kprobe_disabled(p))
+		goto out;
+
+	regs = ftrace_get_regs(fregs);
+	if (!regs)
+		goto out;
+
+	kcb = get_kprobe_ctlblk();
+	if (kprobe_running()) {
+		kprobes_inc_nmissed_count(p);
+	} else {
+		unsigned long orig_ip = instruction_pointer(regs);
+
+		instruction_pointer_set(regs, ip);
+
+		__this_cpu_write(current_kprobe, p);
+		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+		if (!p->pre_handler || !p->pre_handler(p, regs)) {
+			/*
+			 * Emulate singlestep (and also recover regs->csr_era)
+			 * as if there is a nop
+			 */
+			instruction_pointer_set(regs, (unsigned long)p->addr + MCOUNT_INSN_SIZE);
+			if (unlikely(p->post_handler)) {
+				kcb->kprobe_status = KPROBE_HIT_SSDONE;
+				p->post_handler(p, regs, 0);
+			}
+			instruction_pointer_set(regs, orig_ip);
+		}
+
+		/*
+		 * If pre_handler returns !0, it changes regs->csr_era. We have to
+		 * skip emulating post_handler.
+		 */
+		__this_cpu_write(current_kprobe, NULL);
+	}
+out:
+	ftrace_test_recursion_unlock(bit);
+}
+NOKPROBE_SYMBOL(kprobe_ftrace_handler);
+
+int arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+	p->ainsn.insn = NULL;
+	return 0;
+}
+#endif /* CONFIG_KPROBES_ON_FTRACE */
diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S
index 7e5c293ed89f70c4872d54dfcad422673dc65112..44ff1ff6426011bb0aaf26f12d9dd62ebe182ed3 100644
--- a/arch/loongarch/kernel/genex.S
+++ b/arch/loongarch/kernel/genex.S
@@ -34,7 +34,7 @@ SYM_FUNC_END(__arch_cpu_idle)
 SYM_FUNC_START(handle_vint)
 	BACKUP_T0T1
 	SAVE_ALL
-	la.abs	t1, __arch_cpu_idle
+	la_abs	t1, __arch_cpu_idle
 	LONG_L	t0, sp, PT_ERA
 	/* 32 byte rollback region */
 	ori	t0, t0, 0x1f
@@ -43,7 +43,7 @@ SYM_FUNC_START(handle_vint)
 	LONG_S	t0, sp, PT_ERA
 1:	move	a0, sp
 	move	a1, sp
-	la.abs	t0, do_vint
+	la_abs	t0, do_vint
 	jirl	ra, t0, 0
 	RESTORE_ALL_AND_RET
 SYM_FUNC_END(handle_vint)
@@ -72,7 +72,7 @@ SYM_FUNC_END(except_vec_cex)
 	SAVE_ALL
 	build_prep_\prep
 	move	a0, sp
-	la.abs	t0, do_\handler
+	la_abs	t0, do_\handler
 	jirl	ra, t0, 0
 	668:
 	RESTORE_ALL_AND_RET
@@ -93,6 +93,6 @@ SYM_FUNC_END(except_vec_cex)
 	BUILD_HANDLER reserved reserved none	/* others */
 
 SYM_FUNC_START(handle_sys)
-	la.abs	t0, handle_syscall
+	la_abs	t0, handle_syscall
 	jr	t0
 SYM_FUNC_END(handle_sys)
diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
index 57bada6b4e9310983a8cb3b601bbca86e465ae8c..aa64b179744f52ac4332bfe82302490e14a8210a 100644
--- a/arch/loongarch/kernel/head.S
+++ b/arch/loongarch/kernel/head.S
@@ -24,7 +24,7 @@ _head:
 	.org	0x8
 	.dword	kernel_entry		/* Kernel entry point */
 	.dword	_end - _text		/* Kernel image effective size */
-	.quad	0			/* Kernel image load offset from start of RAM */
+	.quad	PHYS_LINK_KADDR		/* Kernel image load offset from start of RAM */
 	.org	0x38			/* 0x20 ~ 0x37 reserved */
 	.long	LINUX_PE_MAGIC
 	.long	pe_header - _head	/* Offset to the PE header */
@@ -50,11 +50,8 @@ SYM_CODE_START(kernel_entry)			# kernel entry point
 	li.d		t0, CSR_DMW1_INIT	# CA, PLV0, 0x9000 xxxx xxxx xxxx
 	csrwr		t0, LOONGARCH_CSR_DMWIN1
 
-	/* We might not get launched at the address the kernel is linked to,
-	   so we jump there.  */
-	la.abs		t0, 0f
-	jr		t0
-0:
+	JUMP_VIRT_ADDR	t0, t1
+
 	/* Enable PG */
 	li.w		t0, 0xb0		# PLV=0, IE=0, PG=1
 	csrwr		t0, LOONGARCH_CSR_CRMD
@@ -89,6 +86,23 @@ SYM_CODE_START(kernel_entry)			# kernel entry point
 	PTR_ADD		sp, sp, tp
 	set_saved_sp	sp, t0, t1
 
+#ifdef CONFIG_RELOCATABLE
+
+	bl		relocate_kernel
+
+#ifdef CONFIG_RANDOMIZE_BASE
+	/* Repoint the sp into the new kernel */
+	PTR_LI		sp, (_THREAD_SIZE - PT_SIZE)
+	PTR_ADD		sp, sp, tp
+	set_saved_sp	sp, t0, t1
+#endif
+
+	/* relocate_kernel() returns the new kernel entry point */
+	jr		a0
+	ASM_BUG()
+
+#endif
+
 	bl		start_kernel
 	ASM_BUG()
 
@@ -106,9 +120,8 @@ SYM_CODE_START(smpboot_entry)
 	li.d		t0, CSR_DMW1_INIT	# CA, PLV0
 	csrwr		t0, LOONGARCH_CSR_DMWIN1
 
-	la.abs		t0, 0f
-	jr		t0
-0:
+	JUMP_VIRT_ADDR	t0, t1
+
 	/* Enable PG */
 	li.w		t0, 0xb0		# PLV=0, IE=0, PG=1
 	csrwr		t0, LOONGARCH_CSR_CRMD
@@ -117,7 +130,7 @@ SYM_CODE_START(smpboot_entry)
 	li.w		t0, 0x00		# FPE=0, SXE=0, ASXE=0, BTE=0
 	csrwr		t0, LOONGARCH_CSR_EUEN
 
-	la.abs		t0, cpuboot_data
+	la.pcrel	t0, cpuboot_data
 	ld.d		sp, t0, CPU_BOOT_STACK
 	ld.d		tp, t0, CPU_BOOT_TINFO
 
diff --git a/arch/loongarch/kernel/hw_breakpoint.c b/arch/loongarch/kernel/hw_breakpoint.c
new file mode 100644
index 0000000000000000000000000000000000000000..2406c95b34cc4f0023f1db8755e60ff7ea0cc18a
--- /dev/null
+++ b/arch/loongarch/kernel/hw_breakpoint.c
@@ -0,0 +1,548 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2023 Loongson Technology Corporation Limited
+ */
+#define pr_fmt(fmt) "hw-breakpoint: " fmt
+
+#include <linux/hw_breakpoint.h>
+#include <linux/kprobes.h>
+#include <linux/perf_event.h>
+
+#include <asm/hw_breakpoint.h>
+
+/* Breakpoint currently in use for each BRP. */
+static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[LOONGARCH_MAX_BRP]);
+
+/* Watchpoint currently in use for each WRP. */
+static DEFINE_PER_CPU(struct perf_event *, wp_on_reg[LOONGARCH_MAX_WRP]);
+
+int hw_breakpoint_slots(int type)
+{
+	/*
+	 * We can be called early, so don't rely on
+	 * our static variables being initialised.
+	 */
+	switch (type) {
+	case TYPE_INST:
+		return get_num_brps();
+	case TYPE_DATA:
+		return get_num_wrps();
+	default:
+		pr_warn("unknown slot type: %d\n", type);
+		return 0;
+	}
+}
+
+#define READ_WB_REG_CASE(OFF, N, REG, T, VAL)		\
+	case (OFF + N):					\
+		LOONGARCH_CSR_WATCH_READ(N, REG, T, VAL);	\
+		break
+
+#define WRITE_WB_REG_CASE(OFF, N, REG, T, VAL)		\
+	case (OFF + N):					\
+		LOONGARCH_CSR_WATCH_WRITE(N, REG, T, VAL);	\
+		break
+
+#define GEN_READ_WB_REG_CASES(OFF, REG, T, VAL)		\
+	READ_WB_REG_CASE(OFF, 0, REG, T, VAL);		\
+	READ_WB_REG_CASE(OFF, 1, REG, T, VAL);		\
+	READ_WB_REG_CASE(OFF, 2, REG, T, VAL);		\
+	READ_WB_REG_CASE(OFF, 3, REG, T, VAL);		\
+	READ_WB_REG_CASE(OFF, 4, REG, T, VAL);		\
+	READ_WB_REG_CASE(OFF, 5, REG, T, VAL);		\
+	READ_WB_REG_CASE(OFF, 6, REG, T, VAL);		\
+	READ_WB_REG_CASE(OFF, 7, REG, T, VAL);
+
+#define GEN_WRITE_WB_REG_CASES(OFF, REG, T, VAL)	\
+	WRITE_WB_REG_CASE(OFF, 0, REG, T, VAL);		\
+	WRITE_WB_REG_CASE(OFF, 1, REG, T, VAL);		\
+	WRITE_WB_REG_CASE(OFF, 2, REG, T, VAL);		\
+	WRITE_WB_REG_CASE(OFF, 3, REG, T, VAL);		\
+	WRITE_WB_REG_CASE(OFF, 4, REG, T, VAL);		\
+	WRITE_WB_REG_CASE(OFF, 5, REG, T, VAL);		\
+	WRITE_WB_REG_CASE(OFF, 6, REG, T, VAL);		\
+	WRITE_WB_REG_CASE(OFF, 7, REG, T, VAL);
+
+static u64 read_wb_reg(int reg, int n, int t)
+{
+	u64 val = 0;
+
+	switch (reg + n) {
+	GEN_READ_WB_REG_CASES(CSR_CFG_ADDR, ADDR, t, val);
+	GEN_READ_WB_REG_CASES(CSR_CFG_MASK, MASK, t, val);
+	GEN_READ_WB_REG_CASES(CSR_CFG_CTRL, CTRL, t, val);
+	GEN_READ_WB_REG_CASES(CSR_CFG_ASID, ASID, t, val);
+	default:
+		pr_warn("Attempt to read from unknown breakpoint register %d\n", n);
+	}
+
+	return val;
+}
+NOKPROBE_SYMBOL(read_wb_reg);
+
+static void write_wb_reg(int reg, int n, int t, u64 val)
+{
+	switch (reg + n) {
+	GEN_WRITE_WB_REG_CASES(CSR_CFG_ADDR, ADDR, t, val);
+	GEN_WRITE_WB_REG_CASES(CSR_CFG_MASK, MASK, t, val);
+	GEN_WRITE_WB_REG_CASES(CSR_CFG_CTRL, CTRL, t, val);
+	GEN_WRITE_WB_REG_CASES(CSR_CFG_ASID, ASID, t, val);
+	default:
+		pr_warn("Attempt to write to unknown breakpoint register %d\n", n);
+	}
+}
+NOKPROBE_SYMBOL(write_wb_reg);
+
+enum hw_breakpoint_ops {
+	HW_BREAKPOINT_INSTALL,
+	HW_BREAKPOINT_UNINSTALL,
+};
+
+/*
+ * hw_breakpoint_slot_setup - Find and setup a perf slot according to operations
+ *
+ * @slots: pointer to array of slots
+ * @max_slots: max number of slots
+ * @bp: perf_event to setup
+ * @ops: operation to be carried out on the slot
+ *
+ * Return:
+ *	slot index on success
+ *	-ENOSPC if no slot is available/matches
+ *	-EINVAL on wrong operations parameter
+ */
+
+static int hw_breakpoint_slot_setup(struct perf_event **slots, int max_slots,
+				    struct perf_event *bp, enum hw_breakpoint_ops ops)
+{
+	int i;
+	struct perf_event **slot;
+
+	for (i = 0; i < max_slots; ++i) {
+		slot = &slots[i];
+		switch (ops) {
+		case HW_BREAKPOINT_INSTALL:
+			if (!*slot) {
+				*slot = bp;
+				return i;
+			}
+			break;
+		case HW_BREAKPOINT_UNINSTALL:
+			if (*slot == bp) {
+				*slot = NULL;
+				return i;
+			}
+			break;
+		default:
+			pr_warn_once("Unhandled hw breakpoint ops %d\n", ops);
+			return -EINVAL;
+		}
+	}
+
+	return -ENOSPC;
+}
+
+void ptrace_hw_copy_thread(struct task_struct *tsk)
+{
+	memset(tsk->thread.hbp_break, 0, sizeof(tsk->thread.hbp_break));
+	memset(tsk->thread.hbp_watch, 0, sizeof(tsk->thread.hbp_watch));
+}
+
+/*
+ * Unregister breakpoints from this task and reset the pointers in the thread_struct.
+ */
+void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
+{
+	int i;
+	struct thread_struct *t = &tsk->thread;
+
+	for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+		if (t->hbp_break[i]) {
+			unregister_hw_breakpoint(t->hbp_break[i]);
+			t->hbp_break[i] = NULL;
+		}
+	}
+
+	for (i = 0; i < LOONGARCH_MAX_WRP; i++) {
+		if (t->hbp_watch[i]) {
+			unregister_hw_breakpoint(t->hbp_watch[i]);
+			t->hbp_watch[i] = NULL;
+		}
+	}
+}
+
+static int hw_breakpoint_control(struct perf_event *bp,
+				 enum hw_breakpoint_ops ops)
+{
+	u32 ctrl;
+	int i, max_slots, enable;
+	struct perf_event **slots;
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+	if (info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) {
+		/* Breakpoint */
+		slots = this_cpu_ptr(bp_on_reg);
+		max_slots = boot_cpu_data.watch_ireg_count;
+	} else {
+		/* Watchpoint */
+		slots = this_cpu_ptr(wp_on_reg);
+		max_slots = boot_cpu_data.watch_dreg_count;
+	}
+
+	i = hw_breakpoint_slot_setup(slots, max_slots, bp, ops);
+
+	if (WARN_ONCE(i < 0, "Can't find any breakpoint slot"))
+		return i;
+
+	switch (ops) {
+	case HW_BREAKPOINT_INSTALL:
+		/* Set the FWPnCFG/MWPnCFG 1~4 register. */
+		write_wb_reg(CSR_CFG_ADDR, i, 0, info->address);
+		write_wb_reg(CSR_CFG_ADDR, i, 1, info->address);
+		write_wb_reg(CSR_CFG_MASK, i, 0, info->mask);
+		write_wb_reg(CSR_CFG_MASK, i, 1, info->mask);
+		write_wb_reg(CSR_CFG_ASID, i, 0, 0);
+		write_wb_reg(CSR_CFG_ASID, i, 1, 0);
+		if (info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) {
+			write_wb_reg(CSR_CFG_CTRL, i, 0, CTRL_PLV_ENABLE);
+		} else {
+			ctrl = encode_ctrl_reg(info->ctrl);
+			write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | CTRL_PLV_ENABLE |
+				     1 << MWPnCFG3_LoadEn | 1 << MWPnCFG3_StoreEn);
+		}
+		enable = csr_read64(LOONGARCH_CSR_CRMD);
+		csr_write64(CSR_CRMD_WE | enable, LOONGARCH_CSR_CRMD);
+		break;
+	case HW_BREAKPOINT_UNINSTALL:
+		/* Reset the FWPnCFG/MWPnCFG 1~4 register. */
+		write_wb_reg(CSR_CFG_ADDR, i, 0, 0);
+		write_wb_reg(CSR_CFG_ADDR, i, 1, 0);
+		write_wb_reg(CSR_CFG_MASK, i, 0, 0);
+		write_wb_reg(CSR_CFG_MASK, i, 1, 0);
+		write_wb_reg(CSR_CFG_CTRL, i, 0, 0);
+		write_wb_reg(CSR_CFG_CTRL, i, 1, 0);
+		write_wb_reg(CSR_CFG_ASID, i, 0, 0);
+		write_wb_reg(CSR_CFG_ASID, i, 1, 0);
+		break;
+	}
+
+	return 0;
+}
+
+/*
+ * Install a perf counter breakpoint.
+ */
+int arch_install_hw_breakpoint(struct perf_event *bp)
+{
+	return hw_breakpoint_control(bp, HW_BREAKPOINT_INSTALL);
+}
+
+void arch_uninstall_hw_breakpoint(struct perf_event *bp)
+{
+	hw_breakpoint_control(bp, HW_BREAKPOINT_UNINSTALL);
+}
+
+static int get_hbp_len(u8 hbp_len)
+{
+	unsigned int len_in_bytes = 0;
+
+	switch (hbp_len) {
+	case LOONGARCH_BREAKPOINT_LEN_1:
+		len_in_bytes = 1;
+		break;
+	case LOONGARCH_BREAKPOINT_LEN_2:
+		len_in_bytes = 2;
+		break;
+	case LOONGARCH_BREAKPOINT_LEN_4:
+		len_in_bytes = 4;
+		break;
+	case LOONGARCH_BREAKPOINT_LEN_8:
+		len_in_bytes = 8;
+		break;
+	}
+
+	return len_in_bytes;
+}
+
+/*
+ * Check whether bp virtual address is in kernel space.
+ */
+int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw)
+{
+	unsigned int len;
+	unsigned long va;
+
+	va = hw->address;
+	len = get_hbp_len(hw->ctrl.len);
+
+	return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
+}
+
+/*
+ * Extract generic type and length encodings from an arch_hw_breakpoint_ctrl.
+ * Hopefully this will disappear when ptrace can bypass the conversion
+ * to generic breakpoint descriptions.
+ */
+int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
+			   int *gen_len, int *gen_type, int *offset)
+{
+	/* Type */
+	switch (ctrl.type) {
+	case LOONGARCH_BREAKPOINT_EXECUTE:
+		*gen_type = HW_BREAKPOINT_X;
+		break;
+	case LOONGARCH_BREAKPOINT_LOAD:
+		*gen_type = HW_BREAKPOINT_R;
+		break;
+	case LOONGARCH_BREAKPOINT_STORE:
+		*gen_type = HW_BREAKPOINT_W;
+		break;
+	case LOONGARCH_BREAKPOINT_LOAD | LOONGARCH_BREAKPOINT_STORE:
+		*gen_type = HW_BREAKPOINT_RW;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (!ctrl.len)
+		return -EINVAL;
+
+	*offset = __ffs(ctrl.len);
+
+	/* Len */
+	switch (ctrl.len) {
+	case LOONGARCH_BREAKPOINT_LEN_1:
+		*gen_len = HW_BREAKPOINT_LEN_1;
+		break;
+	case LOONGARCH_BREAKPOINT_LEN_2:
+		*gen_len = HW_BREAKPOINT_LEN_2;
+		break;
+	case LOONGARCH_BREAKPOINT_LEN_4:
+		*gen_len = HW_BREAKPOINT_LEN_4;
+		break;
+	case LOONGARCH_BREAKPOINT_LEN_8:
+		*gen_len = HW_BREAKPOINT_LEN_8;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * Construct an arch_hw_breakpoint from a perf_event.
+ */
+static int arch_build_bp_info(struct perf_event *bp,
+			      const struct perf_event_attr *attr,
+			      struct arch_hw_breakpoint *hw)
+{
+	/* Type */
+	switch (attr->bp_type) {
+	case HW_BREAKPOINT_X:
+		hw->ctrl.type = LOONGARCH_BREAKPOINT_EXECUTE;
+		break;
+	case HW_BREAKPOINT_R:
+		hw->ctrl.type = LOONGARCH_BREAKPOINT_LOAD;
+		break;
+	case HW_BREAKPOINT_W:
+		hw->ctrl.type = LOONGARCH_BREAKPOINT_STORE;
+		break;
+	case HW_BREAKPOINT_RW:
+		hw->ctrl.type = LOONGARCH_BREAKPOINT_LOAD | LOONGARCH_BREAKPOINT_STORE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Len */
+	switch (attr->bp_len) {
+	case HW_BREAKPOINT_LEN_1:
+		hw->ctrl.len = LOONGARCH_BREAKPOINT_LEN_1;
+		break;
+	case HW_BREAKPOINT_LEN_2:
+		hw->ctrl.len = LOONGARCH_BREAKPOINT_LEN_2;
+		break;
+	case HW_BREAKPOINT_LEN_4:
+		hw->ctrl.len = LOONGARCH_BREAKPOINT_LEN_4;
+		break;
+	case HW_BREAKPOINT_LEN_8:
+		hw->ctrl.len = LOONGARCH_BREAKPOINT_LEN_8;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Address */
+	hw->address = attr->bp_addr;
+
+	return 0;
+}
+
+/*
+ * Validate the arch-specific HW Breakpoint register settings.
+ */
+int hw_breakpoint_arch_parse(struct perf_event *bp,
+			     const struct perf_event_attr *attr,
+			     struct arch_hw_breakpoint *hw)
+{
+	int ret;
+	u64 alignment_mask, offset;
+
+	/* Build the arch_hw_breakpoint. */
+	ret = arch_build_bp_info(bp, attr, hw);
+	if (ret)
+		return ret;
+
+	if (hw->ctrl.type != LOONGARCH_BREAKPOINT_EXECUTE)
+		alignment_mask = 0x7;
+	offset = hw->address & alignment_mask;
+
+	hw->address &= ~alignment_mask;
+	hw->ctrl.len <<= offset;
+
+	return 0;
+}
+
+static void update_bp_registers(struct pt_regs *regs, int enable, int type)
+{
+	u32 ctrl;
+	int i, max_slots;
+	struct perf_event **slots;
+	struct arch_hw_breakpoint *info;
+
+	switch (type) {
+	case 0:
+		slots = this_cpu_ptr(bp_on_reg);
+		max_slots = boot_cpu_data.watch_ireg_count;
+		break;
+	case 1:
+		slots = this_cpu_ptr(wp_on_reg);
+		max_slots = boot_cpu_data.watch_dreg_count;
+		break;
+	default:
+		return;
+	}
+
+	for (i = 0; i < max_slots; ++i) {
+		if (!slots[i])
+			continue;
+
+		info = counter_arch_bp(slots[i]);
+		if (enable) {
+			if ((info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) && (type == 0)) {
+				write_wb_reg(CSR_CFG_CTRL, i, 0, CTRL_PLV_ENABLE);
+				write_wb_reg(CSR_CFG_CTRL, i, 0, CTRL_PLV_ENABLE);
+			} else {
+				ctrl = read_wb_reg(CSR_CFG_CTRL, i, 1);
+				if (info->ctrl.type == LOONGARCH_BREAKPOINT_LOAD)
+					ctrl |= 0x1 << MWPnCFG3_LoadEn;
+				if (info->ctrl.type == LOONGARCH_BREAKPOINT_STORE)
+					ctrl |= 0x1 << MWPnCFG3_StoreEn;
+				write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl);
+			}
+			regs->csr_prmd |= CSR_PRMD_PWE;
+		} else {
+			if ((info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) && (type == 0)) {
+				write_wb_reg(CSR_CFG_CTRL, i, 0, 0);
+			} else {
+				ctrl = read_wb_reg(CSR_CFG_CTRL, i, 1);
+				if (info->ctrl.type == LOONGARCH_BREAKPOINT_LOAD)
+					ctrl &= ~0x1 << MWPnCFG3_LoadEn;
+				if (info->ctrl.type == LOONGARCH_BREAKPOINT_STORE)
+					ctrl &= ~0x1 << MWPnCFG3_StoreEn;
+				write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl);
+			}
+			regs->csr_prmd &= ~CSR_PRMD_PWE;
+		}
+	}
+}
+NOKPROBE_SYMBOL(update_bp_registers);
+
+/*
+ * Debug exception handlers.
+ */
+void breakpoint_handler(struct pt_regs *regs)
+{
+	int i;
+	struct perf_event *bp, **slots;
+
+	slots = this_cpu_ptr(bp_on_reg);
+
+	for (i = 0; i < boot_cpu_data.watch_ireg_count; ++i) {
+		bp = slots[i];
+		if (bp == NULL)
+			continue;
+		perf_bp_event(bp, regs);
+	}
+	update_bp_registers(regs, 0, 0);
+}
+NOKPROBE_SYMBOL(breakpoint_handler);
+
+void watchpoint_handler(struct pt_regs *regs)
+{
+	int i;
+	struct perf_event *wp, **slots;
+
+	slots = this_cpu_ptr(wp_on_reg);
+
+	for (i = 0; i < boot_cpu_data.watch_dreg_count; ++i) {
+		wp = slots[i];
+		if (wp == NULL)
+			continue;
+		perf_bp_event(wp, regs);
+	}
+	update_bp_registers(regs, 0, 1);
+}
+NOKPROBE_SYMBOL(watchpoint_handler);
+
+static int __init arch_hw_breakpoint_init(void)
+{
+	int cpu;
+
+	boot_cpu_data.watch_ireg_count = get_num_brps();
+	boot_cpu_data.watch_dreg_count = get_num_wrps();
+
+	pr_info("Found %d breakpoint and %d watchpoint registers.\n",
+		boot_cpu_data.watch_ireg_count, boot_cpu_data.watch_dreg_count);
+
+	for (cpu = 1; cpu < NR_CPUS; cpu++) {
+		cpu_data[cpu].watch_ireg_count = boot_cpu_data.watch_ireg_count;
+		cpu_data[cpu].watch_dreg_count = boot_cpu_data.watch_dreg_count;
+	}
+
+	return 0;
+}
+arch_initcall(arch_hw_breakpoint_init);
+
+void hw_breakpoint_thread_switch(struct task_struct *next)
+{
+	u64 addr, mask;
+	struct pt_regs *regs = task_pt_regs(next);
+
+	if (test_tsk_thread_flag(next, TIF_SINGLESTEP)) {
+		addr = read_wb_reg(CSR_CFG_ADDR, 0, 0);
+		mask = read_wb_reg(CSR_CFG_MASK, 0, 0);
+		if (!((regs->csr_era ^ addr) & ~mask))
+			csr_write32(CSR_FWPC_SKIP, LOONGARCH_CSR_FWPS);
+		regs->csr_prmd |= CSR_PRMD_PWE;
+	} else {
+		/* Update breakpoints */
+		update_bp_registers(regs, 1, 0);
+		/* Update watchpoints */
+		update_bp_registers(regs, 1, 1);
+	}
+}
+
+void hw_breakpoint_pmu_read(struct perf_event *bp)
+{
+}
+
+/*
+ * Dummy function to register with die_notifier.
+ */
+int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
+				    unsigned long val, void *data)
+{
+	return NOTIFY_DONE;
+}
diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c
index badc590870423433495616c47e53022e4cf96f35..258ef267cd306fd97a27f8aced0931634b518561 100644
--- a/arch/loongarch/kernel/inst.c
+++ b/arch/loongarch/kernel/inst.c
@@ -10,6 +10,129 @@
 
 static DEFINE_RAW_SPINLOCK(patch_lock);
 
+void simu_pc(struct pt_regs *regs, union loongarch_instruction insn)
+{
+	unsigned long pc = regs->csr_era;
+	unsigned int rd = insn.reg1i20_format.rd;
+	unsigned int imm = insn.reg1i20_format.immediate;
+
+	if (pc & 3) {
+		pr_warn("%s: invalid pc 0x%lx\n", __func__, pc);
+		return;
+	}
+
+	switch (insn.reg1i20_format.opcode) {
+	case pcaddi_op:
+		regs->regs[rd] = pc + sign_extend64(imm << 2, 21);
+		break;
+	case pcaddu12i_op:
+		regs->regs[rd] = pc + sign_extend64(imm << 12, 31);
+		break;
+	case pcaddu18i_op:
+		regs->regs[rd] = pc + sign_extend64(imm << 18, 37);
+		break;
+	case pcalau12i_op:
+		regs->regs[rd] = pc + sign_extend64(imm << 12, 31);
+		regs->regs[rd] &= ~((1 << 12) - 1);
+		break;
+	default:
+		pr_info("%s: unknown opcode\n", __func__);
+		return;
+	}
+
+	regs->csr_era += LOONGARCH_INSN_SIZE;
+}
+
+void simu_branch(struct pt_regs *regs, union loongarch_instruction insn)
+{
+	unsigned int imm, imm_l, imm_h, rd, rj;
+	unsigned long pc = regs->csr_era;
+
+	if (pc & 3) {
+		pr_warn("%s: invalid pc 0x%lx\n", __func__, pc);
+		return;
+	}
+
+	imm_l = insn.reg0i26_format.immediate_l;
+	imm_h = insn.reg0i26_format.immediate_h;
+	switch (insn.reg0i26_format.opcode) {
+	case b_op:
+		regs->csr_era = pc + sign_extend64((imm_h << 16 | imm_l) << 2, 27);
+		return;
+	case bl_op:
+		regs->csr_era = pc + sign_extend64((imm_h << 16 | imm_l) << 2, 27);
+		regs->regs[1] = pc + LOONGARCH_INSN_SIZE;
+		return;
+	}
+
+	imm_l = insn.reg1i21_format.immediate_l;
+	imm_h = insn.reg1i21_format.immediate_h;
+	rj = insn.reg1i21_format.rj;
+	switch (insn.reg1i21_format.opcode) {
+	case beqz_op:
+		if (regs->regs[rj] == 0)
+			regs->csr_era = pc + sign_extend64((imm_h << 16 | imm_l) << 2, 22);
+		else
+			regs->csr_era = pc + LOONGARCH_INSN_SIZE;
+		return;
+	case bnez_op:
+		if (regs->regs[rj] != 0)
+			regs->csr_era = pc + sign_extend64((imm_h << 16 | imm_l) << 2, 22);
+		else
+			regs->csr_era = pc + LOONGARCH_INSN_SIZE;
+		return;
+	}
+
+	imm = insn.reg2i16_format.immediate;
+	rj = insn.reg2i16_format.rj;
+	rd = insn.reg2i16_format.rd;
+	switch (insn.reg2i16_format.opcode) {
+	case beq_op:
+		if (regs->regs[rj] == regs->regs[rd])
+			regs->csr_era = pc + sign_extend64(imm << 2, 17);
+		else
+			regs->csr_era = pc + LOONGARCH_INSN_SIZE;
+		break;
+	case bne_op:
+		if (regs->regs[rj] != regs->regs[rd])
+			regs->csr_era = pc + sign_extend64(imm << 2, 17);
+		else
+			regs->csr_era = pc + LOONGARCH_INSN_SIZE;
+		break;
+	case blt_op:
+		if ((long)regs->regs[rj] < (long)regs->regs[rd])
+			regs->csr_era = pc + sign_extend64(imm << 2, 17);
+		else
+			regs->csr_era = pc + LOONGARCH_INSN_SIZE;
+		break;
+	case bge_op:
+		if ((long)regs->regs[rj] >= (long)regs->regs[rd])
+			regs->csr_era = pc + sign_extend64(imm << 2, 17);
+		else
+			regs->csr_era = pc + LOONGARCH_INSN_SIZE;
+		break;
+	case bltu_op:
+		if (regs->regs[rj] < regs->regs[rd])
+			regs->csr_era = pc + sign_extend64(imm << 2, 17);
+		else
+			regs->csr_era = pc + LOONGARCH_INSN_SIZE;
+		break;
+	case bgeu_op:
+		if (regs->regs[rj] >= regs->regs[rd])
+			regs->csr_era = pc + sign_extend64(imm << 2, 17);
+		else
+			regs->csr_era = pc + LOONGARCH_INSN_SIZE;
+		break;
+	case jirl_op:
+		regs->csr_era = regs->regs[rj] + sign_extend64(imm << 2, 17);
+		regs->regs[rd] = pc + LOONGARCH_INSN_SIZE;
+		break;
+	default:
+		pr_info("%s: unknown opcode\n", __func__);
+		return;
+	}
+}
+
 int larch_insn_read(void *addr, u32 *insnp)
 {
 	int ret;
diff --git a/arch/loongarch/kernel/kprobes.c b/arch/loongarch/kernel/kprobes.c
new file mode 100644
index 0000000000000000000000000000000000000000..56c8c4b09a4299652e10cf65c9073a016a93891f
--- /dev/null
+++ b/arch/loongarch/kernel/kprobes.c
@@ -0,0 +1,406 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kdebug.h>
+#include <linux/kprobes.h>
+#include <linux/preempt.h>
+#include <asm/break.h>
+
+static const union loongarch_instruction breakpoint_insn = {
+	.reg0i15_format = {
+		.opcode = break_op,
+		.immediate = BRK_KPROBE_BP,
+	}
+};
+
+static const union loongarch_instruction singlestep_insn = {
+	.reg0i15_format = {
+		.opcode = break_op,
+		.immediate = BRK_KPROBE_SSTEPBP,
+	}
+};
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe);
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+static bool insns_not_supported(union loongarch_instruction insn)
+{
+	switch (insn.reg2i14_format.opcode) {
+	case llw_op:
+	case lld_op:
+	case scw_op:
+	case scd_op:
+		pr_notice("kprobe: ll and sc instructions are not supported\n");
+		return true;
+	}
+
+	switch (insn.reg1i21_format.opcode) {
+	case bceqz_op:
+		pr_notice("kprobe: bceqz and bcnez instructions are not supported\n");
+		return true;
+	}
+
+	return false;
+}
+NOKPROBE_SYMBOL(insns_not_supported);
+
+static bool insns_need_simulation(struct kprobe *p)
+{
+	if (is_pc_ins(&p->opcode))
+		return true;
+
+	if (is_branch_ins(&p->opcode))
+		return true;
+
+	return false;
+}
+NOKPROBE_SYMBOL(insns_need_simulation);
+
+static void arch_simulate_insn(struct kprobe *p, struct pt_regs *regs)
+{
+	if (is_pc_ins(&p->opcode))
+		simu_pc(regs, p->opcode);
+	else if (is_branch_ins(&p->opcode))
+		simu_branch(regs, p->opcode);
+}
+NOKPROBE_SYMBOL(arch_simulate_insn);
+
+static void arch_prepare_ss_slot(struct kprobe *p)
+{
+	p->ainsn.insn[0] = *p->addr;
+	p->ainsn.insn[1] = singlestep_insn;
+	p->ainsn.restore = (unsigned long)p->addr + LOONGARCH_INSN_SIZE;
+}
+NOKPROBE_SYMBOL(arch_prepare_ss_slot);
+
+static void arch_prepare_simulate(struct kprobe *p)
+{
+	p->ainsn.restore = 0;
+}
+NOKPROBE_SYMBOL(arch_prepare_simulate);
+
+int arch_prepare_kprobe(struct kprobe *p)
+{
+	if ((unsigned long)p->addr & 0x3)
+		return -EILSEQ;
+
+	/* copy instruction */
+	p->opcode = *p->addr;
+
+	/* decode instruction */
+	if (insns_not_supported(p->opcode))
+		return -EINVAL;
+
+	if (insns_need_simulation(p)) {
+		p->ainsn.insn = NULL;
+	} else {
+		p->ainsn.insn = get_insn_slot();
+		if (!p->ainsn.insn)
+			return -ENOMEM;
+	}
+
+	/* prepare the instruction */
+	if (p->ainsn.insn)
+		arch_prepare_ss_slot(p);
+	else
+		arch_prepare_simulate(p);
+
+	return 0;
+}
+NOKPROBE_SYMBOL(arch_prepare_kprobe);
+
+/* Install breakpoint in text */
+void arch_arm_kprobe(struct kprobe *p)
+{
+	*p->addr = breakpoint_insn;
+	flush_insn_slot(p);
+}
+NOKPROBE_SYMBOL(arch_arm_kprobe);
+
+/* Remove breakpoint from text */
+void arch_disarm_kprobe(struct kprobe *p)
+{
+	*p->addr = p->opcode;
+	flush_insn_slot(p);
+}
+NOKPROBE_SYMBOL(arch_disarm_kprobe);
+
+void arch_remove_kprobe(struct kprobe *p)
+{
+	if (p->ainsn.insn) {
+		free_insn_slot(p->ainsn.insn, 0);
+		p->ainsn.insn = NULL;
+	}
+}
+NOKPROBE_SYMBOL(arch_remove_kprobe);
+
+static void save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	kcb->prev_kprobe.kp = kprobe_running();
+	kcb->prev_kprobe.status = kcb->kprobe_status;
+}
+NOKPROBE_SYMBOL(save_previous_kprobe);
+
+static void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
+	kcb->kprobe_status = kcb->prev_kprobe.status;
+}
+NOKPROBE_SYMBOL(restore_previous_kprobe);
+
+static void set_current_kprobe(struct kprobe *p)
+{
+	__this_cpu_write(current_kprobe, p);
+}
+NOKPROBE_SYMBOL(set_current_kprobe);
+
+/*
+ * Interrupts need to be disabled before single-step mode is set,
+ * and not reenabled until after single-step mode ends.
+ * Without disabling interrupt on local CPU, there is a chance of
+ * interrupt occurrence in the period of exception return and start
+ * of out-of-line single-step, that result in wrongly single stepping
+ * into the interrupt handler.
+ */
+static void save_local_irqflag(struct kprobe_ctlblk *kcb,
+			       struct pt_regs *regs)
+{
+	kcb->saved_status = regs->csr_prmd;
+	regs->csr_prmd &= ~CSR_PRMD_PIE;
+}
+NOKPROBE_SYMBOL(save_local_irqflag);
+
+static void restore_local_irqflag(struct kprobe_ctlblk *kcb,
+				  struct pt_regs *regs)
+{
+	regs->csr_prmd = kcb->saved_status;
+}
+NOKPROBE_SYMBOL(restore_local_irqflag);
+
+static void post_kprobe_handler(struct kprobe *cur, struct kprobe_ctlblk *kcb,
+				struct pt_regs *regs)
+{
+	/* return addr restore if non-branching insn */
+	if (cur->ainsn.restore != 0)
+		instruction_pointer_set(regs, cur->ainsn.restore);
+
+	/* restore back original saved kprobe variables and continue */
+	if (kcb->kprobe_status == KPROBE_REENTER) {
+		restore_previous_kprobe(kcb);
+		preempt_enable_no_resched();
+		return;
+	}
+
+	/*
+	 * update the kcb status even if the cur->post_handler is
+	 * not set because reset_curent_kprobe() doesn't update kcb.
+	 */
+	kcb->kprobe_status = KPROBE_HIT_SSDONE;
+	if (cur->post_handler)
+		cur->post_handler(cur, regs, 0);
+
+	reset_current_kprobe();
+	preempt_enable_no_resched();
+}
+NOKPROBE_SYMBOL(post_kprobe_handler);
+
+static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
+			     struct kprobe_ctlblk *kcb, int reenter)
+{
+	if (reenter) {
+		save_previous_kprobe(kcb);
+		set_current_kprobe(p);
+		kcb->kprobe_status = KPROBE_REENTER;
+	} else {
+		kcb->kprobe_status = KPROBE_HIT_SS;
+	}
+
+	if (p->ainsn.insn) {
+		/* IRQs and single stepping do not mix well */
+		save_local_irqflag(kcb, regs);
+		/* set ip register to prepare for single stepping */
+		regs->csr_era = (unsigned long)p->ainsn.insn;
+	} else {
+		/* simulate single steping */
+		arch_simulate_insn(p, regs);
+		/* now go for post processing */
+		post_kprobe_handler(p, kcb, regs);
+	}
+}
+NOKPROBE_SYMBOL(setup_singlestep);
+
+static bool reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
+			   struct kprobe_ctlblk *kcb)
+{
+	switch (kcb->kprobe_status) {
+	case KPROBE_HIT_SS:
+	case KPROBE_HIT_SSDONE:
+	case KPROBE_HIT_ACTIVE:
+		kprobes_inc_nmissed_count(p);
+		setup_singlestep(p, regs, kcb, 1);
+		break;
+	case KPROBE_REENTER:
+		pr_warn("Failed to recover from reentered kprobes.\n");
+		dump_kprobe(p);
+		WARN_ON_ONCE(1);
+		break;
+	default:
+		WARN_ON(1);
+		return false;
+	}
+
+	return true;
+}
+NOKPROBE_SYMBOL(reenter_kprobe);
+
+bool kprobe_breakpoint_handler(struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb;
+	struct kprobe *p, *cur_kprobe;
+	kprobe_opcode_t *addr = (kprobe_opcode_t *)regs->csr_era;
+
+	/*
+	 * We don't want to be preempted for the entire
+	 * duration of kprobe processing.
+	 */
+	preempt_disable();
+	kcb = get_kprobe_ctlblk();
+	cur_kprobe = kprobe_running();
+
+	p = get_kprobe(addr);
+	if (p) {
+		if (cur_kprobe) {
+			if (reenter_kprobe(p, regs, kcb))
+				return true;
+		} else {
+			/* Probe hit */
+			set_current_kprobe(p);
+			kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+			/*
+			 * If we have no pre-handler or it returned 0, we
+			 * continue with normal processing.  If we have a
+			 * pre-handler and it returned non-zero, it will
+			 * modify the execution path and no need to single
+			 * stepping. Let's just reset current kprobe and exit.
+			 *
+			 * pre_handler can hit a breakpoint and can step thru
+			 * before return.
+			 */
+			if (!p->pre_handler || !p->pre_handler(p, regs)) {
+				setup_singlestep(p, regs, kcb, 0);
+			} else {
+				reset_current_kprobe();
+				preempt_enable_no_resched();
+			}
+			return true;
+		}
+	}
+
+	if (addr->word != breakpoint_insn.word) {
+		/*
+		 * The breakpoint instruction was removed right
+		 * after we hit it.  Another cpu has removed
+		 * either a probepoint or a debugger breakpoint
+		 * at this address.  In either case, no further
+		 * handling of this interrupt is appropriate.
+		 * Return back to original instruction, and continue.
+		 */
+		regs->csr_era = (unsigned long)addr;
+		preempt_enable_no_resched();
+		return true;
+	}
+
+	preempt_enable_no_resched();
+	return false;
+}
+NOKPROBE_SYMBOL(kprobe_breakpoint_handler);
+
+bool kprobe_singlestep_handler(struct pt_regs *regs)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	unsigned long addr = instruction_pointer(regs);
+
+	if (cur && (kcb->kprobe_status & (KPROBE_HIT_SS | KPROBE_REENTER)) &&
+	    ((unsigned long)&cur->ainsn.insn[1] == addr)) {
+		restore_local_irqflag(kcb, regs);
+		post_kprobe_handler(cur, kcb, regs);
+		return true;
+	}
+
+	preempt_enable_no_resched();
+	return false;
+}
+NOKPROBE_SYMBOL(kprobe_singlestep_handler);
+
+bool kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	switch (kcb->kprobe_status) {
+	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
+		/*
+		 * We are here because the instruction being single
+		 * stepped caused a page fault. We reset the current
+		 * kprobe and the ip points back to the probe address
+		 * and allow the page fault handler to continue as a
+		 * normal page fault.
+		 */
+		regs->csr_era = (unsigned long)cur->addr;
+		WARN_ON_ONCE(!instruction_pointer(regs));
+
+		if (kcb->kprobe_status == KPROBE_REENTER) {
+			restore_previous_kprobe(kcb);
+		} else {
+			restore_local_irqflag(kcb, regs);
+			reset_current_kprobe();
+		}
+		preempt_enable_no_resched();
+		break;
+	}
+	return false;
+}
+NOKPROBE_SYMBOL(kprobe_fault_handler);
+
+/*
+ * Provide a blacklist of symbols identifying ranges which cannot be kprobed.
+ * This blacklist is exposed to userspace via debugfs (kprobes/blacklist).
+ */
+int __init arch_populate_kprobe_blacklist(void)
+{
+	return kprobe_add_area_blacklist((unsigned long)__irqentry_text_start,
+					 (unsigned long)__irqentry_text_end);
+}
+
+int __init arch_init_kprobes(void)
+{
+	return 0;
+}
+
+/* ASM function that handles the kretprobes must not be probed */
+NOKPROBE_SYMBOL(__kretprobe_trampoline);
+
+/* Called from __kretprobe_trampoline */
+void __used *trampoline_probe_handler(struct pt_regs *regs)
+{
+	return (void *)kretprobe_trampoline_handler(regs, NULL);
+}
+NOKPROBE_SYMBOL(trampoline_probe_handler);
+
+void arch_prepare_kretprobe(struct kretprobe_instance *ri,
+			    struct pt_regs *regs)
+{
+	ri->ret_addr = (kprobe_opcode_t *)regs->regs[1];
+	ri->fp = NULL;
+
+	/* Replace the return addr with trampoline addr */
+	regs->regs[1] = (unsigned long)&__kretprobe_trampoline;
+}
+NOKPROBE_SYMBOL(arch_prepare_kretprobe);
+
+int arch_trampoline_kprobe(struct kprobe *p)
+{
+	return 0;
+}
+NOKPROBE_SYMBOL(arch_trampoline_kprobe);
diff --git a/arch/loongarch/kernel/kprobes_trampoline.S b/arch/loongarch/kernel/kprobes_trampoline.S
new file mode 100644
index 0000000000000000000000000000000000000000..af94b0d213fa950083e7d9375bd5b402ff1db20d
--- /dev/null
+++ b/arch/loongarch/kernel/kprobes_trampoline.S
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#include <linux/linkage.h>
+#include <asm/stackframe.h>
+
+	.text
+
+	.macro save_all_base_regs
+	cfi_st  ra, PT_R1
+	cfi_st	tp, PT_R2
+	cfi_st	a0, PT_R4
+	cfi_st	a1, PT_R5
+	cfi_st	a2, PT_R6
+	cfi_st	a3, PT_R7
+	cfi_st	a4, PT_R8
+	cfi_st	a5, PT_R9
+	cfi_st	a6, PT_R10
+	cfi_st	a7, PT_R11
+	cfi_st	t0, PT_R12
+	cfi_st	t1, PT_R13
+	cfi_st	t2, PT_R14
+	cfi_st	t3, PT_R15
+	cfi_st	t4, PT_R16
+	cfi_st	t5, PT_R17
+	cfi_st	t6, PT_R18
+	cfi_st	t7, PT_R19
+	cfi_st	t8, PT_R20
+	cfi_st	u0, PT_R21
+	cfi_st	fp, PT_R22
+	cfi_st	s0, PT_R23
+	cfi_st	s1, PT_R24
+	cfi_st	s2, PT_R25
+	cfi_st	s3, PT_R26
+	cfi_st	s4, PT_R27
+	cfi_st	s5, PT_R28
+	cfi_st	s6, PT_R29
+	cfi_st	s7, PT_R30
+	cfi_st	s8, PT_R31
+	csrrd	t0, LOONGARCH_CSR_CRMD
+	andi	t0, t0, 0x7 /* extract bit[1:0] PLV, bit[2] IE */
+	LONG_S	t0, sp, PT_CRMD
+	.endm
+
+	.macro restore_all_base_regs
+	cfi_ld	tp, PT_R2
+	cfi_ld	a0, PT_R4
+	cfi_ld	a1, PT_R5
+	cfi_ld	a2, PT_R6
+	cfi_ld	a3, PT_R7
+	cfi_ld	a4, PT_R8
+	cfi_ld	a5, PT_R9
+	cfi_ld	a6, PT_R10
+	cfi_ld	a7, PT_R11
+	cfi_ld	t0, PT_R12
+	cfi_ld	t1, PT_R13
+	cfi_ld	t2, PT_R14
+	cfi_ld	t3, PT_R15
+	cfi_ld	t4, PT_R16
+	cfi_ld	t5, PT_R17
+	cfi_ld	t6, PT_R18
+	cfi_ld	t7, PT_R19
+	cfi_ld	t8, PT_R20
+	cfi_ld	u0, PT_R21
+	cfi_ld	fp, PT_R22
+	cfi_ld	s0, PT_R23
+	cfi_ld	s1, PT_R24
+	cfi_ld	s2, PT_R25
+	cfi_ld	s3, PT_R26
+	cfi_ld	s4, PT_R27
+	cfi_ld	s5, PT_R28
+	cfi_ld	s6, PT_R29
+	cfi_ld	s7, PT_R30
+	cfi_ld	s8, PT_R31
+	LONG_L  t0, sp, PT_CRMD
+	li.d	t1, 0x7 /* mask bit[1:0] PLV, bit[2] IE */
+	csrxchg t0, t1, LOONGARCH_CSR_CRMD
+	.endm
+
+SYM_CODE_START(__kretprobe_trampoline)
+	addi.d	sp, sp, -PT_SIZE
+	save_all_base_regs
+
+	addi.d	t0, sp, PT_SIZE
+	LONG_S	t0, sp, PT_R3
+
+	move a0, sp /* pt_regs */
+
+	bl trampoline_probe_handler
+
+	/* use the result as the return-address */
+	move ra, a0
+
+	restore_all_base_regs
+	addi.d	sp, sp, PT_SIZE
+
+	jr ra
+SYM_CODE_END(__kretprobe_trampoline)
diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
index edfd220a3737aadad190d43a2ecaf91fdcaf44e5..fa2443c7afb23688ff4b5ca0ba8363720c981282 100644
--- a/arch/loongarch/kernel/process.c
+++ b/arch/loongarch/kernel/process.c
@@ -18,6 +18,7 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
+#include <linux/hw_breakpoint.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
@@ -96,6 +97,11 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
 	regs->regs[3] = sp;
 }
 
+void flush_thread(void)
+{
+	flush_ptrace_hw_breakpoint(current);
+}
+
 void exit_thread(struct task_struct *tsk)
 {
 }
@@ -181,6 +187,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 		childregs->regs[2] = tls;
 
 out:
+	ptrace_hw_copy_thread(p);
 	clear_tsk_thread_flag(p, TIF_USEDFPU);
 	clear_tsk_thread_flag(p, TIF_USEDSIMD);
 	clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE);
diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c
index dc2b82ea894cd26c1c3d662e488145d82d1fba0d..06bceae7d1040c6cfb38fe07acea7f1f765eb1a0 100644
--- a/arch/loongarch/kernel/ptrace.c
+++ b/arch/loongarch/kernel/ptrace.c
@@ -20,7 +20,9 @@
 #include <linux/context_tracking.h>
 #include <linux/elf.h>
 #include <linux/errno.h>
+#include <linux/hw_breakpoint.h>
 #include <linux/mm.h>
+#include <linux/nospec.h>
 #include <linux/ptrace.h>
 #include <linux/regset.h>
 #include <linux/sched.h>
@@ -29,6 +31,7 @@
 #include <linux/smp.h>
 #include <linux/stddef.h>
 #include <linux/seccomp.h>
+#include <linux/thread_info.h>
 #include <linux/uaccess.h>
 
 #include <asm/byteorder.h>
@@ -39,6 +42,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
+#include <asm/ptrace.h>
 #include <asm/reg.h>
 #include <asm/syscall.h>
 
@@ -246,6 +250,384 @@ static int cfg_set(struct task_struct *target,
 	return 0;
 }
 
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+
+/*
+ * Handle hitting a HW-breakpoint.
+ */
+static void ptrace_hbptriggered(struct perf_event *bp,
+				struct perf_sample_data *data,
+				struct pt_regs *regs)
+{
+	int i;
+	struct arch_hw_breakpoint *bkpt = counter_arch_bp(bp);
+
+	for (i = 0; i < LOONGARCH_MAX_BRP; ++i)
+		if (current->thread.hbp_break[i] == bp)
+			break;
+
+	for (i = 0; i < LOONGARCH_MAX_WRP; ++i)
+		if (current->thread.hbp_watch[i] == bp)
+			break;
+
+	force_sig_ptrace_errno_trap(i, (void __user *)bkpt->address);
+}
+
+static struct perf_event *ptrace_hbp_get_event(unsigned int note_type,
+					       struct task_struct *tsk,
+					       unsigned long idx)
+{
+	struct perf_event *bp;
+
+	switch (note_type) {
+	case NT_LOONGARCH_HW_BREAK:
+		if (idx >= LOONGARCH_MAX_BRP)
+			return ERR_PTR(-EINVAL);
+		idx = array_index_nospec(idx, LOONGARCH_MAX_BRP);
+		bp = tsk->thread.hbp_break[idx];
+		break;
+	case NT_LOONGARCH_HW_WATCH:
+		if (idx >= LOONGARCH_MAX_WRP)
+			return ERR_PTR(-EINVAL);
+		idx = array_index_nospec(idx, LOONGARCH_MAX_WRP);
+		bp = tsk->thread.hbp_watch[idx];
+		break;
+	}
+
+	return bp;
+}
+
+static int ptrace_hbp_set_event(unsigned int note_type,
+				struct task_struct *tsk,
+				unsigned long idx,
+				struct perf_event *bp)
+{
+	switch (note_type) {
+	case NT_LOONGARCH_HW_BREAK:
+		if (idx >= LOONGARCH_MAX_BRP)
+			return -EINVAL;
+		idx = array_index_nospec(idx, LOONGARCH_MAX_BRP);
+		tsk->thread.hbp_break[idx] = bp;
+		break;
+	case NT_LOONGARCH_HW_WATCH:
+		if (idx >= LOONGARCH_MAX_WRP)
+			return -EINVAL;
+		idx = array_index_nospec(idx, LOONGARCH_MAX_WRP);
+		tsk->thread.hbp_watch[idx] = bp;
+		break;
+	}
+
+	return 0;
+}
+
+static struct perf_event *ptrace_hbp_create(unsigned int note_type,
+					    struct task_struct *tsk,
+					    unsigned long idx)
+{
+	int err, type;
+	struct perf_event *bp;
+	struct perf_event_attr attr;
+
+	switch (note_type) {
+	case NT_LOONGARCH_HW_BREAK:
+		type = HW_BREAKPOINT_X;
+		break;
+	case NT_LOONGARCH_HW_WATCH:
+		type = HW_BREAKPOINT_RW;
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	ptrace_breakpoint_init(&attr);
+
+	/*
+	 * Initialise fields to sane defaults
+	 * (i.e. values that will pass validation).
+	 */
+	attr.bp_addr	= 0;
+	attr.bp_len	= HW_BREAKPOINT_LEN_4;
+	attr.bp_type	= type;
+	attr.disabled	= 1;
+
+	bp = register_user_hw_breakpoint(&attr, ptrace_hbptriggered, NULL, tsk);
+	if (IS_ERR(bp))
+		return bp;
+
+	err = ptrace_hbp_set_event(note_type, tsk, idx, bp);
+	if (err)
+		return ERR_PTR(err);
+
+	return bp;
+}
+
+static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type,
+				     struct arch_hw_breakpoint_ctrl ctrl,
+				     struct perf_event_attr *attr)
+{
+	int err, len, type, offset;
+
+	err = arch_bp_generic_fields(ctrl, &len, &type, &offset);
+	if (err)
+		return err;
+
+	switch (note_type) {
+	case NT_LOONGARCH_HW_BREAK:
+		if ((type & HW_BREAKPOINT_X) != type)
+			return -EINVAL;
+		break;
+	case NT_LOONGARCH_HW_WATCH:
+		if ((type & HW_BREAKPOINT_RW) != type)
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	attr->bp_len	= len;
+	attr->bp_type	= type;
+	attr->bp_addr	+= offset;
+
+	return 0;
+}
+
+static int ptrace_hbp_get_resource_info(unsigned int note_type, u16 *info)
+{
+	u8 num;
+	u16 reg = 0;
+
+	switch (note_type) {
+	case NT_LOONGARCH_HW_BREAK:
+		num = hw_breakpoint_slots(TYPE_INST);
+		break;
+	case NT_LOONGARCH_HW_WATCH:
+		num = hw_breakpoint_slots(TYPE_DATA);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	*info = reg | num;
+
+	return 0;
+}
+
+static struct perf_event *ptrace_hbp_get_initialised_bp(unsigned int note_type,
+							struct task_struct *tsk,
+							unsigned long idx)
+{
+	struct perf_event *bp = ptrace_hbp_get_event(note_type, tsk, idx);
+
+	if (!bp)
+		bp = ptrace_hbp_create(note_type, tsk, idx);
+
+	return bp;
+}
+
+static int ptrace_hbp_get_ctrl(unsigned int note_type,
+			       struct task_struct *tsk,
+			       unsigned long idx, u32 *ctrl)
+{
+	struct perf_event *bp = ptrace_hbp_get_event(note_type, tsk, idx);
+
+	if (IS_ERR(bp))
+		return PTR_ERR(bp);
+
+	*ctrl = bp ? encode_ctrl_reg(counter_arch_bp(bp)->ctrl) : 0;
+
+	return 0;
+}
+
+static int ptrace_hbp_get_mask(unsigned int note_type,
+			       struct task_struct *tsk,
+			       unsigned long idx, u64 *mask)
+{
+	struct perf_event *bp = ptrace_hbp_get_event(note_type, tsk, idx);
+
+	if (IS_ERR(bp))
+		return PTR_ERR(bp);
+
+	*mask = bp ? counter_arch_bp(bp)->mask : 0;
+
+	return 0;
+}
+
+static int ptrace_hbp_get_addr(unsigned int note_type,
+			       struct task_struct *tsk,
+			       unsigned long idx, u64 *addr)
+{
+	struct perf_event *bp = ptrace_hbp_get_event(note_type, tsk, idx);
+
+	if (IS_ERR(bp))
+		return PTR_ERR(bp);
+
+	*addr = bp ? counter_arch_bp(bp)->address : 0;
+
+	return 0;
+}
+
+static int ptrace_hbp_set_ctrl(unsigned int note_type,
+			       struct task_struct *tsk,
+			       unsigned long idx, u32 uctrl)
+{
+	int err;
+	struct perf_event *bp;
+	struct perf_event_attr attr;
+	struct arch_hw_breakpoint_ctrl ctrl;
+
+	bp = ptrace_hbp_get_initialised_bp(note_type, tsk, idx);
+	if (IS_ERR(bp))
+		return PTR_ERR(bp);
+
+	attr = bp->attr;
+	decode_ctrl_reg(uctrl, &ctrl);
+	err = ptrace_hbp_fill_attr_ctrl(note_type, ctrl, &attr);
+	if (err)
+		return err;
+
+	return modify_user_hw_breakpoint(bp, &attr);
+}
+
+static int ptrace_hbp_set_mask(unsigned int note_type,
+			       struct task_struct *tsk,
+			       unsigned long idx, u64 mask)
+{
+	struct perf_event *bp;
+	struct perf_event_attr attr;
+	struct arch_hw_breakpoint *info;
+
+	bp = ptrace_hbp_get_initialised_bp(note_type, tsk, idx);
+	if (IS_ERR(bp))
+		return PTR_ERR(bp);
+
+	attr = bp->attr;
+	info = counter_arch_bp(bp);
+	info->mask = mask;
+
+	return modify_user_hw_breakpoint(bp, &attr);
+}
+
+static int ptrace_hbp_set_addr(unsigned int note_type,
+			       struct task_struct *tsk,
+			       unsigned long idx, u64 addr)
+{
+	struct perf_event *bp;
+	struct perf_event_attr attr;
+
+	bp = ptrace_hbp_get_initialised_bp(note_type, tsk, idx);
+	if (IS_ERR(bp))
+		return PTR_ERR(bp);
+
+	attr = bp->attr;
+	attr.bp_addr = addr;
+
+	return modify_user_hw_breakpoint(bp, &attr);
+}
+
+#define PTRACE_HBP_CTRL_SZ	sizeof(u32)
+#define PTRACE_HBP_ADDR_SZ	sizeof(u64)
+#define PTRACE_HBP_MASK_SZ	sizeof(u64)
+
+static int hw_break_get(struct task_struct *target,
+			const struct user_regset *regset,
+			struct membuf to)
+{
+	u16 info;
+	u32 ctrl;
+	u64 addr, mask;
+	int ret, idx = 0;
+	unsigned int note_type = regset->core_note_type;
+
+	/* Resource info */
+	ret = ptrace_hbp_get_resource_info(note_type, &info);
+	if (ret)
+		return ret;
+
+	membuf_write(&to, &info, sizeof(info));
+
+	/* (address, ctrl) registers */
+	while (to.left) {
+		ret = ptrace_hbp_get_addr(note_type, target, idx, &addr);
+		if (ret)
+			return ret;
+
+		ret = ptrace_hbp_get_mask(note_type, target, idx, &mask);
+		if (ret)
+			return ret;
+
+		ret = ptrace_hbp_get_ctrl(note_type, target, idx, &ctrl);
+		if (ret)
+			return ret;
+
+		membuf_store(&to, addr);
+		membuf_store(&to, mask);
+		membuf_store(&to, ctrl);
+		idx++;
+	}
+
+	return 0;
+}
+
+static int hw_break_set(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			const void *kbuf, const void __user *ubuf)
+{
+	u32 ctrl;
+	u64 addr, mask;
+	int ret, idx = 0, offset, limit;
+	unsigned int note_type = regset->core_note_type;
+
+	/* Resource info */
+	offset = offsetof(struct user_watch_state, dbg_regs);
+	user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, 0, offset);
+
+	/* (address, ctrl) registers */
+	limit = regset->n * regset->size;
+	while (count && offset < limit) {
+		if (count < PTRACE_HBP_ADDR_SZ)
+			return -EINVAL;
+
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &addr,
+					 offset, offset + PTRACE_HBP_ADDR_SZ);
+		if (ret)
+			return ret;
+
+		ret = ptrace_hbp_set_addr(note_type, target, idx, addr);
+		if (ret)
+			return ret;
+		offset += PTRACE_HBP_ADDR_SZ;
+
+		if (!count)
+			break;
+
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &mask,
+					 offset, offset + PTRACE_HBP_ADDR_SZ);
+		if (ret)
+			return ret;
+
+		ret = ptrace_hbp_set_mask(note_type, target, idx, mask);
+		if (ret)
+			return ret;
+		offset += PTRACE_HBP_MASK_SZ;
+
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &mask,
+					 offset, offset + PTRACE_HBP_MASK_SZ);
+		if (ret)
+			return ret;
+
+		ret = ptrace_hbp_set_ctrl(note_type, target, idx, ctrl);
+		if (ret)
+			return ret;
+		offset += PTRACE_HBP_CTRL_SZ;
+		idx++;
+	}
+
+	return 0;
+}
+
+#endif
+
 struct pt_regs_offset {
 	const char *name;
 	int offset;
@@ -319,6 +701,10 @@ enum loongarch_regset {
 	REGSET_GPR,
 	REGSET_FPR,
 	REGSET_CPUCFG,
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	REGSET_HW_BREAK,
+	REGSET_HW_WATCH,
+#endif
 };
 
 static const struct user_regset loongarch64_regsets[] = {
@@ -346,6 +732,24 @@ static const struct user_regset loongarch64_regsets[] = {
 		.regset_get	= cfg_get,
 		.set		= cfg_set,
 	},
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	[REGSET_HW_BREAK] = {
+		.core_note_type = NT_LOONGARCH_HW_BREAK,
+		.n = sizeof(struct user_watch_state) / sizeof(u32),
+		.size = sizeof(u32),
+		.align = sizeof(u32),
+		.regset_get = hw_break_get,
+		.set = hw_break_set,
+	},
+	[REGSET_HW_WATCH] = {
+		.core_note_type = NT_LOONGARCH_HW_WATCH,
+		.n = sizeof(struct user_watch_state) / sizeof(u32),
+		.size = sizeof(u32),
+		.align = sizeof(u32),
+		.regset_get = hw_break_get,
+		.set = hw_break_set,
+	},
+#endif
 };
 
 static const struct user_regset_view user_loongarch64_view = {
@@ -431,3 +835,71 @@ long arch_ptrace(struct task_struct *child, long request,
 
 	return ret;
 }
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+static void ptrace_triggered(struct perf_event *bp,
+		      struct perf_sample_data *data, struct pt_regs *regs)
+{
+	struct perf_event_attr attr;
+
+	attr = bp->attr;
+	attr.disabled = true;
+	modify_user_hw_breakpoint(bp, &attr);
+}
+
+static int set_single_step(struct task_struct *tsk, unsigned long addr)
+{
+	struct perf_event *bp;
+	struct perf_event_attr attr;
+	struct arch_hw_breakpoint *info;
+	struct thread_struct *thread = &tsk->thread;
+
+	bp = thread->hbp_break[0];
+	if (!bp) {
+		ptrace_breakpoint_init(&attr);
+
+		attr.bp_addr = addr;
+		attr.bp_len = HW_BREAKPOINT_LEN_8;
+		attr.bp_type = HW_BREAKPOINT_X;
+
+		bp = register_user_hw_breakpoint(&attr, ptrace_triggered,
+						 NULL, tsk);
+		if (IS_ERR(bp))
+			return PTR_ERR(bp);
+
+		thread->hbp_break[0] = bp;
+	} else {
+		int err;
+
+		attr = bp->attr;
+		attr.bp_addr = addr;
+
+		/* Reenable breakpoint */
+		attr.disabled = false;
+		err = modify_user_hw_breakpoint(bp, &attr);
+		if (unlikely(err))
+			return err;
+
+		csr_write64(attr.bp_addr, LOONGARCH_CSR_IB0ADDR);
+	}
+	info = counter_arch_bp(bp);
+	info->mask = TASK_SIZE - 1;
+
+	return 0;
+}
+
+/* ptrace API */
+void user_enable_single_step(struct task_struct *task)
+{
+	struct thread_info *ti = task_thread_info(task);
+
+	set_single_step(task, task_pt_regs(task)->csr_era);
+	task->thread.single_step = task_pt_regs(task)->csr_era;
+	set_ti_thread_flag(ti, TIF_SINGLESTEP);
+}
+
+void user_disable_single_step(struct task_struct *task)
+{
+	clear_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+#endif
diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c
new file mode 100644
index 0000000000000000000000000000000000000000..01f94d1e3edf6f8b26b31f30f2494a1ace76f0e8
--- /dev/null
+++ b/arch/loongarch/kernel/relocate.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for Kernel relocation at boot time
+ *
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/elf.h>
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/panic_notifier.h>
+#include <linux/start_kernel.h>
+#include <asm/bootinfo.h>
+#include <asm/early_ioremap.h>
+#include <asm/inst.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+
+#define RELOCATED(x) ((void *)((long)x + reloc_offset))
+#define RELOCATED_KASLR(x) ((void *)((long)x + random_offset))
+
+static unsigned long reloc_offset;
+
+static inline void __init relocate_relative(void)
+{
+	Elf64_Rela *rela, *rela_end;
+	rela = (Elf64_Rela *)&__rela_dyn_begin;
+	rela_end = (Elf64_Rela *)&__rela_dyn_end;
+
+	for ( ; rela < rela_end; rela++) {
+		Elf64_Addr addr = rela->r_offset;
+		Elf64_Addr relocated_addr = rela->r_addend;
+
+		if (rela->r_info != R_LARCH_RELATIVE)
+			continue;
+
+		if (relocated_addr >= VMLINUX_LOAD_ADDRESS)
+			relocated_addr = (Elf64_Addr)RELOCATED(relocated_addr);
+
+		*(Elf64_Addr *)RELOCATED(addr) = relocated_addr;
+	}
+}
+
+static inline void __init relocate_absolute(long random_offset)
+{
+	void *begin, *end;
+	struct rela_la_abs *p;
+
+	begin = RELOCATED_KASLR(&__la_abs_begin);
+	end   = RELOCATED_KASLR(&__la_abs_end);
+
+	for (p = begin; (void *)p < end; p++) {
+		long v = p->symvalue;
+		uint32_t lu12iw, ori, lu32id, lu52id;
+		union loongarch_instruction *insn = (void *)p - p->offset;
+
+		lu12iw = (v >> 12) & 0xfffff;
+		ori    = v & 0xfff;
+		lu32id = (v >> 32) & 0xfffff;
+		lu52id = v >> 52;
+
+		insn[0].reg1i20_format.immediate = lu12iw;
+		insn[1].reg2i12_format.immediate = ori;
+		insn[2].reg1i20_format.immediate = lu32id;
+		insn[3].reg2i12_format.immediate = lu52id;
+	}
+}
+
+#ifdef CONFIG_RANDOMIZE_BASE
+static inline __init unsigned long rotate_xor(unsigned long hash,
+					      const void *area, size_t size)
+{
+	size_t i, diff;
+	const typeof(hash) *ptr = PTR_ALIGN(area, sizeof(hash));
+
+	diff = (void *)ptr - area;
+	if (size < diff + sizeof(hash))
+		return hash;
+
+	size = ALIGN_DOWN(size - diff, sizeof(hash));
+
+	for (i = 0; i < size / sizeof(hash); i++) {
+		/* Rotate by odd number of bits and XOR. */
+		hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
+		hash ^= ptr[i];
+	}
+
+	return hash;
+}
+
+static inline __init unsigned long get_random_boot(void)
+{
+	unsigned long hash = 0;
+	unsigned long entropy = random_get_entropy();
+
+	/* Attempt to create a simple but unpredictable starting entropy. */
+	hash = rotate_xor(hash, linux_banner, strlen(linux_banner));
+
+	/* Add in any runtime entropy we can get */
+	hash = rotate_xor(hash, &entropy, sizeof(entropy));
+
+	return hash;
+}
+
+static inline __init bool kaslr_disabled(void)
+{
+	char *str;
+	const char *builtin_cmdline = CONFIG_CMDLINE;
+
+	str = strstr(builtin_cmdline, "nokaslr");
+	if (str == builtin_cmdline || (str > builtin_cmdline && *(str - 1) == ' '))
+		return true;
+
+	str = strstr(boot_command_line, "nokaslr");
+	if (str == boot_command_line || (str > boot_command_line && *(str - 1) == ' '))
+		return true;
+
+	return false;
+}
+
+/* Choose a new address for the kernel */
+static inline void __init *determine_relocation_address(void)
+{
+	unsigned long kernel_length;
+	unsigned long random_offset;
+	void *destination = _text;
+
+	if (kaslr_disabled())
+		return destination;
+
+	kernel_length = (long)_end - (long)_text;
+
+	random_offset = get_random_boot() << 16;
+	random_offset &= (CONFIG_RANDOMIZE_BASE_MAX_OFFSET - 1);
+	if (random_offset < kernel_length)
+		random_offset += ALIGN(kernel_length, 0xffff);
+
+	return RELOCATED_KASLR(destination);
+}
+
+static inline int __init relocation_addr_valid(void *location_new)
+{
+	if ((unsigned long)location_new & 0x00000ffff)
+		return 0; /* Inappropriately aligned new location */
+
+	if ((unsigned long)location_new < (unsigned long)_end)
+		return 0; /* New location overlaps original kernel */
+
+	return 1;
+}
+#endif
+
+static inline void __init update_reloc_offset(unsigned long *addr, long random_offset)
+{
+	unsigned long *new_addr = (unsigned long *)RELOCATED_KASLR(addr);
+
+	*new_addr = (unsigned long)reloc_offset;
+}
+
+void * __init relocate_kernel(void)
+{
+	unsigned long kernel_length;
+	unsigned long random_offset = 0;
+	void *location_new = _text; /* Default to original kernel start */
+	void *kernel_entry = start_kernel; /* Default to original kernel entry point */
+	char *cmdline = early_ioremap(fw_arg1, COMMAND_LINE_SIZE); /* Boot command line is passed in fw_arg1 */
+
+	strscpy(boot_command_line, cmdline, COMMAND_LINE_SIZE);
+
+#ifdef CONFIG_RANDOMIZE_BASE
+	location_new = determine_relocation_address();
+
+	/* Sanity check relocation address */
+	if (relocation_addr_valid(location_new))
+		random_offset = (unsigned long)location_new - (unsigned long)(_text);
+#endif
+	reloc_offset = (unsigned long)_text - VMLINUX_LOAD_ADDRESS;
+
+	if (random_offset) {
+		kernel_length = (long)(_end) - (long)(_text);
+
+		/* Copy the kernel to it's new location */
+		memcpy(location_new, _text, kernel_length);
+
+		/* Sync the caches ready for execution of new kernel */
+		__asm__ __volatile__ (
+			"ibar 0 \t\n"
+			"dbar 0 \t\n"
+			::: "memory");
+
+		reloc_offset += random_offset;
+
+		/* Return the new kernel's entry point */
+		kernel_entry = RELOCATED_KASLR(start_kernel);
+
+		/* The current thread is now within the relocated kernel */
+		__current_thread_info = RELOCATED_KASLR(__current_thread_info);
+
+		update_reloc_offset(&reloc_offset, random_offset);
+	}
+
+	if (reloc_offset)
+		relocate_relative();
+
+	relocate_absolute(random_offset);
+
+	return kernel_entry;
+}
+
+/*
+ * Show relocation information on panic.
+ */
+static void show_kernel_relocation(const char *level)
+{
+	if (reloc_offset > 0) {
+		printk(level);
+		pr_cont("Kernel relocated by 0x%lx\n", reloc_offset);
+		pr_cont(" .text @ 0x%px\n", _text);
+		pr_cont(" .data @ 0x%px\n", _sdata);
+		pr_cont(" .bss  @ 0x%px\n", __bss_start);
+	}
+}
+
+static int kernel_location_notifier_fn(struct notifier_block *self,
+				       unsigned long v, void *p)
+{
+	show_kernel_relocation(KERN_EMERG);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block kernel_location_notifier = {
+	.notifier_call = kernel_location_notifier_fn
+};
+
+static int __init register_kernel_offset_dumper(void)
+{
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &kernel_location_notifier);
+	return 0;
+}
+
+arch_initcall(register_kernel_offset_dumper);
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index 4344502c0b31780677a84cd5bb117004336df712..bae84ccf6d3671c29e1849ec1542693d1ef5a06f 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -234,11 +234,14 @@ static void __init arch_reserve_vmcore(void)
 #endif
 }
 
+/* 2MB alignment for crash kernel regions */
+#define CRASH_ALIGN	SZ_2M
+#define CRASH_ADDR_MAX	SZ_4G
+
 static void __init arch_parse_crashkernel(void)
 {
 #ifdef CONFIG_KEXEC
 	int ret;
-	unsigned long long start;
 	unsigned long long total_mem;
 	unsigned long long crash_base, crash_size;
 
@@ -247,8 +250,13 @@ static void __init arch_parse_crashkernel(void)
 	if (ret < 0 || crash_size <= 0)
 		return;
 
-	start = memblock_phys_alloc_range(crash_size, 1, crash_base, crash_base + crash_size);
-	if (start != crash_base) {
+	if (crash_base <= 0) {
+		crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, CRASH_ALIGN, CRASH_ADDR_MAX);
+		if (!crash_base) {
+			pr_warn("crashkernel reservation failed - No suitable area found.\n");
+			return;
+		}
+	} else if (!memblock_phys_alloc_range(crash_size, CRASH_ALIGN, crash_base, crash_base + crash_size)) {
 		pr_warn("Invalid memory region reserved for crash kernel\n");
 		return;
 	}
diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c
index a6576dea590c0b26644ce3eebd392b17a1477490..4351f69d995015e471c0269e1c6c37c9d4b388e2 100644
--- a/arch/loongarch/kernel/time.c
+++ b/arch/loongarch/kernel/time.c
@@ -140,16 +140,17 @@ static int get_timer_irq(void)
 
 int constant_clockevent_init(void)
 {
-	int irq;
 	unsigned int cpu = smp_processor_id();
 	unsigned long min_delta = 0x600;
 	unsigned long max_delta = (1UL << 48) - 1;
 	struct clock_event_device *cd;
-	static int timer_irq_installed = 0;
+	static int irq = 0, timer_irq_installed = 0;
 
-	irq = get_timer_irq();
-	if (irq < 0)
-		pr_err("Failed to map irq %d (timer)\n", irq);
+	if (!timer_irq_installed) {
+		irq = get_timer_irq();
+		if (irq < 0)
+			pr_err("Failed to map irq %d (timer)\n", irq);
+	}
 
 	cd = &per_cpu(constant_clockevent_device, cpu);
 
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index c38a146a973b45beafc0e984bdff77631fbea0bb..de8ebe20b666cf5934751801e615f6f16ea21f4a 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -371,9 +371,14 @@ int no_unaligned_warning __read_mostly = 1;	/* Only 1 warning by default */
 
 asmlinkage void noinstr do_ale(struct pt_regs *regs)
 {
-	unsigned int *pc;
 	irqentry_state_t state = irqentry_enter(regs);
 
+#ifndef CONFIG_ARCH_STRICT_ALIGN
+	die_if_kernel("Kernel ale access", regs);
+	force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr);
+#else
+	unsigned int *pc;
+
 	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr);
 
 	/*
@@ -397,8 +402,8 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs)
 sigbus:
 	die_if_kernel("Kernel ale access", regs);
 	force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr);
-
 out:
+#endif
 	irqentry_exit(regs, state);
 }
 
@@ -432,7 +437,9 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs)
 	unsigned long era = exception_era(regs);
 	irqentry_state_t state = irqentry_enter(regs);
 
-	local_irq_enable();
+	if (regs->csr_prmd & CSR_PRMD_PIE)
+		local_irq_enable();
+
 	current->thread.trap_nr = read_csr_excode();
 	if (__get_inst(&opcode, (u32 *)era, user))
 		goto out_sigsegv;
@@ -445,14 +452,12 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs)
 	 */
 	switch (bcode) {
 	case BRK_KPROBE_BP:
-		if (notify_die(DIE_BREAK, "Kprobe", regs, bcode,
-			       current->thread.trap_nr, SIGTRAP) == NOTIFY_STOP)
+		if (kprobe_breakpoint_handler(regs))
 			goto out;
 		else
 			break;
 	case BRK_KPROBE_SSTEPBP:
-		if (notify_die(DIE_SSTEPBP, "Kprobe_SingleStep", regs, bcode,
-			       current->thread.trap_nr, SIGTRAP) == NOTIFY_STOP)
+		if (kprobe_singlestep_handler(regs))
 			goto out;
 		else
 			break;
@@ -495,7 +500,9 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs)
 	}
 
 out:
-	local_irq_disable();
+	if (regs->csr_prmd & CSR_PRMD_PIE)
+		local_irq_disable();
+
 	irqentry_exit(regs, state);
 	return;
 
@@ -506,7 +513,52 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs)
 
 asmlinkage void noinstr do_watch(struct pt_regs *regs)
 {
+	irqentry_state_t state = irqentry_enter(regs);
+
+#ifndef CONFIG_HAVE_HW_BREAKPOINT
 	pr_warn("Hardware watch point handler not implemented!\n");
+#else
+	if (test_tsk_thread_flag(current, TIF_SINGLESTEP)) {
+		int llbit = (csr_read32(LOONGARCH_CSR_LLBCTL) & 0x1);
+		unsigned long pc = instruction_pointer(regs);
+		union loongarch_instruction *ip = (union loongarch_instruction *)pc;
+
+		if (llbit) {
+			/*
+			 * When the ll-sc combo is encountered, it is regarded as an single
+			 * instruction. So don't clear llbit and reset CSR.FWPS.Skip until
+			 * the llsc execution is completed.
+			 */
+			csr_write32(CSR_FWPC_SKIP, LOONGARCH_CSR_FWPS);
+			csr_write32(CSR_LLBCTL_KLO, LOONGARCH_CSR_LLBCTL);
+			goto out;
+		}
+
+		if (pc == current->thread.single_step) {
+			/*
+			 * Certain insns are occasionally not skipped when CSR.FWPS.Skip is
+			 * set, such as fld.d/fst.d. So singlestep needs to compare whether
+			 * the csr_era is equal to the value of singlestep which last time set.
+			 */
+			if (!is_self_loop_ins(ip, regs)) {
+				/*
+				 * Check if the given instruction the target pc is equal to the
+				 * current pc, If yes, then we should not set the CSR.FWPS.SKIP
+				 * bit to break the original instruction stream.
+				 */
+				csr_write32(CSR_FWPC_SKIP, LOONGARCH_CSR_FWPS);
+				goto out;
+			}
+		}
+	} else {
+		breakpoint_handler(regs);
+		watchpoint_handler(regs);
+	}
+
+	force_sig(SIGTRAP);
+out:
+#endif
+	irqentry_exit(regs, state);
 }
 
 asmlinkage void noinstr do_ri(struct pt_regs *regs)
diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S
index 78506b31ba61babffa2238b5b891652f078ed741..0c7b041be9d8de9ef82e878fa0904c88818044f0 100644
--- a/arch/loongarch/kernel/vmlinux.lds.S
+++ b/arch/loongarch/kernel/vmlinux.lds.S
@@ -65,10 +65,21 @@ SECTIONS
 		__alt_instructions_end = .;
 	}
 
+#ifdef CONFIG_RELOCATABLE
+	. = ALIGN(8);
+	.la_abs : AT(ADDR(.la_abs) - LOAD_OFFSET) {
+		__la_abs_begin = .;
+		*(.la_abs)
+		__la_abs_end = .;
+	}
+#endif
+
 	.got : ALIGN(16) { *(.got) }
 	.plt : ALIGN(16) { *(.plt) }
 	.got.plt : ALIGN(16) { *(.got.plt) }
 
+	.data.rel : { *(.data.rel*) }
+
 	. = ALIGN(PECOFF_SEGMENT_ALIGN);
 	__init_begin = .;
 	__inittext_begin = .;
@@ -92,8 +103,6 @@ SECTIONS
 	PERCPU_SECTION(1 << CONFIG_L1_CACHE_SHIFT)
 #endif
 
-	.rela.dyn : ALIGN(8) { *(.rela.dyn) *(.rela*) }
-
 	.init.bss : {
 		*(.init.bss)
 	}
@@ -106,6 +115,12 @@ SECTIONS
 	RO_DATA(4096)
 	RW_DATA(1 << CONFIG_L1_CACHE_SHIFT, PAGE_SIZE, THREAD_SIZE)
 
+	.rela.dyn : ALIGN(8) {
+		__rela_dyn_begin = .;
+		 *(.rela.dyn) *(.rela*)
+		__rela_dyn_end = .;
+	}
+
 	.sdata : {
 		*(.sdata)
 	}
@@ -132,6 +147,7 @@ SECTIONS
 
 	DISCARDS
 	/DISCARD/ : {
+		*(.dynamic .dynsym .dynstr .hash .gnu.hash)
 		*(.gnu.attributes)
 		*(.options)
 		*(.eh_frame)
diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S
index 7c07d595ee89aca1428431b18727f33332576b06..3b7e1dec71094d4eadb829b65a433a7e6e7fcdca 100644
--- a/arch/loongarch/lib/memcpy.S
+++ b/arch/loongarch/lib/memcpy.S
@@ -17,6 +17,7 @@ SYM_FUNC_START(memcpy)
 	ALTERNATIVE	"b __memcpy_generic", \
 			"b __memcpy_fast", CPU_FEATURE_UAL
 SYM_FUNC_END(memcpy)
+_ASM_NOKPROBE(memcpy)
 
 EXPORT_SYMBOL(memcpy)
 
@@ -41,6 +42,7 @@ SYM_FUNC_START(__memcpy_generic)
 2:	move	a0, a3
 	jr	ra
 SYM_FUNC_END(__memcpy_generic)
+_ASM_NOKPROBE(__memcpy_generic)
 
 /*
  * void *__memcpy_fast(void *dst, const void *src, size_t n)
@@ -93,3 +95,4 @@ SYM_FUNC_START(__memcpy_fast)
 3:	move	a0, a3
 	jr	ra
 SYM_FUNC_END(__memcpy_fast)
+_ASM_NOKPROBE(__memcpy_fast)
diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S
index 6ffdb46da78fdfee56ce06f73fb16e250d61e663..b796c3d6da05258aad1e40f9051dc81c937f4b17 100644
--- a/arch/loongarch/lib/memmove.S
+++ b/arch/loongarch/lib/memmove.S
@@ -29,6 +29,7 @@ SYM_FUNC_START(memmove)
 	b	rmemcpy
 4:	b	__rmemcpy_generic
 SYM_FUNC_END(memmove)
+_ASM_NOKPROBE(memmove)
 
 EXPORT_SYMBOL(memmove)
 
@@ -39,6 +40,7 @@ SYM_FUNC_START(rmemcpy)
 	ALTERNATIVE	"b __rmemcpy_generic", \
 			"b __rmemcpy_fast", CPU_FEATURE_UAL
 SYM_FUNC_END(rmemcpy)
+_ASM_NOKPROBE(rmemcpy)
 
 /*
  * void *__rmemcpy_generic(void *dst, const void *src, size_t n)
@@ -64,6 +66,7 @@ SYM_FUNC_START(__rmemcpy_generic)
 2:	move	a0, a3
 	jr	ra
 SYM_FUNC_END(__rmemcpy_generic)
+_ASM_NOKPROBE(__rmemcpy_generic)
 
 /*
  * void *__rmemcpy_fast(void *dst, const void *src, size_t n)
@@ -119,3 +122,4 @@ SYM_FUNC_START(__rmemcpy_fast)
 3:	move	a0, a3
 	jr	ra
 SYM_FUNC_END(__rmemcpy_fast)
+_ASM_NOKPROBE(__rmemcpy_fast)
diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S
index e7cb4ea3747d7ce045ad51406604fb9035da991f..a9eb732ab2adb9e132592f998717c49be542423e 100644
--- a/arch/loongarch/lib/memset.S
+++ b/arch/loongarch/lib/memset.S
@@ -23,6 +23,7 @@ SYM_FUNC_START(memset)
 	ALTERNATIVE	"b __memset_generic", \
 			"b __memset_fast", CPU_FEATURE_UAL
 SYM_FUNC_END(memset)
+_ASM_NOKPROBE(memset)
 
 EXPORT_SYMBOL(memset)
 
@@ -45,6 +46,7 @@ SYM_FUNC_START(__memset_generic)
 2:	move	a0, a3
 	jr	ra
 SYM_FUNC_END(__memset_generic)
+_ASM_NOKPROBE(__memset_generic)
 
 /*
  * void *__memset_fast(void *s, int c, size_t n)
@@ -89,3 +91,4 @@ SYM_FUNC_START(__memset_fast)
 3:	move	a0, a3
 	jr	ra
 SYM_FUNC_END(__memset_fast)
+_ASM_NOKPROBE(__memset_fast)
diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c
index 1ccd53655cab097f02ed09a5c1bd566de38d01ec..449087bd589d339dba1b27f66cc07f170085cff7 100644
--- a/arch/loongarch/mm/fault.c
+++ b/arch/loongarch/mm/fault.c
@@ -135,6 +135,9 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
 	struct vm_area_struct *vma = NULL;
 	vm_fault_t fault;
 
+	if (kprobe_page_fault(regs, current->thread.trap_nr))
+		return;
+
 	/*
 	 * We fault-in kernel-space virtual memory on-demand. The
 	 * 'reference' page table is init_mm.pgd.
diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S
index 58781c6e4191a892d9f17b8bed22dfe10d8e544b..244e2f5aeee564c75c0459569caf4f879876cbe3 100644
--- a/arch/loongarch/mm/tlbex.S
+++ b/arch/loongarch/mm/tlbex.S
@@ -24,8 +24,7 @@
 	move		a0, sp
 	REG_S		a2, sp, PT_BVADDR
 	li.w		a1, \write
-	la.abs		t0, do_page_fault
-	jirl		ra, t0, 0
+	bl		do_page_fault
 	RESTORE_ALL_AND_RET
 	SYM_FUNC_END(tlb_do_page_fault_\write)
 	.endm
@@ -40,7 +39,7 @@ SYM_FUNC_START(handle_tlb_protect)
 	move		a1, zero
 	csrrd		a2, LOONGARCH_CSR_BADV
 	REG_S		a2, sp, PT_BVADDR
-	la.abs		t0, do_page_fault
+	la_abs		t0, do_page_fault
 	jirl		ra, t0, 0
 	RESTORE_ALL_AND_RET
 SYM_FUNC_END(handle_tlb_protect)
@@ -116,7 +115,7 @@ smp_pgtable_change_load:
 
 #ifdef CONFIG_64BIT
 vmalloc_load:
-	la.abs		t1, swapper_pg_dir
+	la_abs		t1, swapper_pg_dir
 	b		vmalloc_done_load
 #endif
 
@@ -187,7 +186,7 @@ tlb_huge_update_load:
 nopage_tlb_load:
 	dbar		0
 	csrrd		ra, EXCEPTION_KS2
-	la.abs		t0, tlb_do_page_fault_0
+	la_abs		t0, tlb_do_page_fault_0
 	jr		t0
 SYM_FUNC_END(handle_tlb_load)
 
@@ -263,7 +262,7 @@ smp_pgtable_change_store:
 
 #ifdef CONFIG_64BIT
 vmalloc_store:
-	la.abs		t1, swapper_pg_dir
+	la_abs		t1, swapper_pg_dir
 	b		vmalloc_done_store
 #endif
 
@@ -336,7 +335,7 @@ tlb_huge_update_store:
 nopage_tlb_store:
 	dbar		0
 	csrrd		ra, EXCEPTION_KS2
-	la.abs		t0, tlb_do_page_fault_1
+	la_abs		t0, tlb_do_page_fault_1
 	jr		t0
 SYM_FUNC_END(handle_tlb_store)
 
@@ -411,7 +410,7 @@ smp_pgtable_change_modify:
 
 #ifdef CONFIG_64BIT
 vmalloc_modify:
-	la.abs		t1, swapper_pg_dir
+	la_abs		t1, swapper_pg_dir
 	b		vmalloc_done_modify
 #endif
 
@@ -483,7 +482,7 @@ tlb_huge_update_modify:
 nopage_tlb_modify:
 	dbar		0
 	csrrd		ra, EXCEPTION_KS2
-	la.abs		t0, tlb_do_page_fault_1
+	la_abs		t0, tlb_do_page_fault_1
 	jr		t0
 SYM_FUNC_END(handle_tlb_modify)
 
diff --git a/arch/loongarch/power/suspend_asm.S b/arch/loongarch/power/suspend_asm.S
index eb2675642f9f455ff2b7aa635ccc699980e205a9..90da899c06a194989ee4c0044f7cc908131090e7 100644
--- a/arch/loongarch/power/suspend_asm.S
+++ b/arch/loongarch/power/suspend_asm.S
@@ -78,9 +78,8 @@ SYM_INNER_LABEL(loongarch_wakeup_start, SYM_L_GLOBAL)
 	li.d		t0, CSR_DMW1_INIT	# CA, PLV0
 	csrwr		t0, LOONGARCH_CSR_DMWIN1
 
-	la.abs		t0, 0f
-	jr		t0
-0:
+	JUMP_VIRT_ADDR	t0, t1
+
 	la.pcrel	t0, acpi_saved_sp
 	ld.d		sp, t0, 0
 	SETUP_WAKEUP
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 68de6f4c4eee35661ae45775238cfcdc1232c322..ac3da855fb197ef32e41b6f2611d5908939d0171 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -445,6 +445,8 @@ typedef struct elf64_shdr {
 #define NT_LOONGARCH_LSX	0xa02	/* LoongArch Loongson SIMD Extension registers */
 #define NT_LOONGARCH_LASX	0xa03	/* LoongArch Loongson Advanced SIMD Extension registers */
 #define NT_LOONGARCH_LBT	0xa04	/* LoongArch Loongson Binary Translation registers */
+#define NT_LOONGARCH_HW_BREAK	0xa05   /* LoongArch hardware breakpoint registers */
+#define NT_LOONGARCH_HW_WATCH	0xa06   /* LoongArch hardware watchpoint registers */
 
 /* Note types with note name "GNU" */
 #define NT_GNU_PROPERTY_TYPE_0	5
diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c
index fd346f58ddbaf10889d3de29fd7399cd90a94258..ef44c614c6d900a5c89f9bf51c0684341f640c95 100644
--- a/samples/kprobes/kprobe_example.c
+++ b/samples/kprobes/kprobe_example.c
@@ -55,6 +55,10 @@ static int __kprobes handler_pre(struct kprobe *p, struct pt_regs *regs)
 	pr_info("<%s> p->addr, 0x%p, ip = 0x%lx, flags = 0x%lx\n",
 		p->symbol_name, p->addr, regs->psw.addr, regs->flags);
 #endif
+#ifdef CONFIG_LOONGARCH
+	pr_info("<%s> p->addr = 0x%p, era = 0x%lx, estat = 0x%lx\n",
+		p->symbol_name, p->addr, regs->csr_era, regs->csr_estat);
+#endif
 
 	/* A dump_stack() here will give a stack backtrace */
 	return 0;
@@ -92,6 +96,10 @@ static void __kprobes handler_post(struct kprobe *p, struct pt_regs *regs,
 	pr_info("<%s> p->addr, 0x%p, flags = 0x%lx\n",
 		p->symbol_name, p->addr, regs->flags);
 #endif
+#ifdef CONFIG_LOONGARCH
+	pr_info("<%s> p->addr = 0x%p, estat = 0x%lx\n",
+		p->symbol_name, p->addr, regs->csr_estat);
+#endif
 }
 
 static int __init kprobe_init(void)
diff --git a/tools/arch/loongarch/include/uapi/asm/bitsperlong.h b/tools/arch/loongarch/include/uapi/asm/bitsperlong.h
new file mode 100644
index 0000000000000000000000000000000000000000..d4e32b3d484379631682aa2b01247803c053b2b4
--- /dev/null
+++ b/tools/arch/loongarch/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_LOONGARCH_BITSPERLONG_H
+#define __ASM_LOONGARCH_BITSPERLONG_H
+
+#define __BITS_PER_LONG (__SIZEOF_POINTER__ * 8)
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_LOONGARCH_BITSPERLONG_H */
diff --git a/tools/scripts/Makefile.arch b/tools/scripts/Makefile.arch
index 0c6c7f45688784b59cd7e85032c8d1d9a3a12925..1c72d07cb9fed7237bb91568826e77f49be9bc06 100644
--- a/tools/scripts/Makefile.arch
+++ b/tools/scripts/Makefile.arch
@@ -5,7 +5,7 @@ HOSTARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \
                                   -e s/s390x/s390/ -e s/parisc64/parisc/ \
                                   -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \
                                   -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \
-                                  -e s/riscv.*/riscv/)
+                                  -e s/riscv.*/riscv/ -e s/loongarch.*/loongarch/)
 
 ifndef ARCH
 ARCH := $(HOSTARCH)
@@ -34,6 +34,15 @@ ifeq ($(ARCH),sh64)
        SRCARCH := sh
 endif
 
+# Additional ARCH settings for loongarch
+ifeq ($(ARCH),loongarch32)
+       SRCARCH := loongarch
+endif
+
+ifeq ($(ARCH),loongarch64)
+       SRCARCH := loongarch
+endif
+
 LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
 ifeq ($(LP64), 1)
   IS_64_BIT := 1
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
index 45974156522217e844d490ffc689375484333eac..a4f8e7c53c1f73550b3a9675ff25494d8ff7b9ca 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
@@ -28,6 +28,9 @@ s390*)
 mips*)
   ARG1=%r4
 ;;
+loongarch*)
+  ARG1=%r4
+;;
 *)
   echo "Please implement other architecture here"
   exit_untested
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
index d4662c8cf407f17e46f0d6d9db1dd431290bc761..1df61e13a812b789d11a06cc9a3ddc517a72a359 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
@@ -40,6 +40,10 @@ mips*)
   GOODREG=%r4
   BADREG=%r12
 ;;
+loongarch*)
+  GOODREG=%r4
+  BADREG=%r12
+;;
 *)
   echo "Please implement other architecture here"
   exit_untested
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 61386e499b77daac56b29cce11b7453d40ac6f3e..43ec36b179dc092f404f18a9773bbf39c90fd8ef 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -128,6 +128,8 @@ struct seccomp_data {
 #  define __NR_seccomp 277
 # elif defined(__csky__)
 #  define __NR_seccomp 277
+# elif defined(__loongarch__)
+#  define __NR_seccomp 277
 # elif defined(__hppa__)
 #  define __NR_seccomp 338
 # elif defined(__powerpc__)
@@ -1755,6 +1757,10 @@ TEST_F(TRACE_poke, getpid_runs_normally)
 				    NT_ARM_SYSTEM_CALL, &__v));	\
 	} while (0)
 # define SYSCALL_RET(_regs)	(_regs).regs[0]
+#elif defined(__loongarch__)
+# define ARCH_REGS		struct user_pt_regs
+# define SYSCALL_NUM(_regs)	(_regs).regs[11]
+# define SYSCALL_RET(_regs)	(_regs).regs[4]
 #elif defined(__riscv) && __riscv_xlen == 64
 # define ARCH_REGS		struct user_regs_struct
 # define SYSCALL_NUM(_regs)	(_regs).a7