Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 29 Jan 2016 13:06:30 +0000 (UTC)
From:      Wojciech Macek <wma@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r295038 - in head/sys: arm64/arm64 arm64/include conf
Message-ID:  <201601291306.u0TD6UoB000571@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: wma
Date: Fri Jan 29 13:06:30 2016
New Revision: 295038
URL: https://svnweb.freebsd.org/changeset/base/295038

Log:
  Framework for ARM64 instruction disassembler
  
      Provide an easy to use framework for ARM64 DDB disassembler.
      This commit does not contain full list of instruction opcodes.
  
  Obtained from:         Semihalf
  Sponsored by:          Cavium
  Approved by:           cognet (mentor)
  Reviewed by:           zbb, andrew, cognet
  Differential revision: https://reviews.freebsd.org/D5114

Added:
  head/sys/arm64/arm64/disassem.c   (contents, props changed)
  head/sys/arm64/include/disassem.h   (contents, props changed)
Modified:
  head/sys/arm64/arm64/db_disasm.c
  head/sys/arm64/include/armreg.h
  head/sys/conf/files.arm64

Modified: head/sys/arm64/arm64/db_disasm.c
==============================================================================
--- head/sys/arm64/arm64/db_disasm.c	Fri Jan 29 11:00:33 2016	(r295037)
+++ head/sys/arm64/arm64/db_disasm.c	Fri Jan 29 13:06:30 2016	(r295038)
@@ -31,11 +31,40 @@
 __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <ddb/ddb.h>
+#include <ddb/db_access.h>
+#include <ddb/db_sym.h>
+
+#include <machine/disassem.h>
+
+static u_int db_disasm_read_word(vm_offset_t);
+static void db_disasm_printaddr(vm_offset_t);
+
+/* Glue code to interface db_disasm to the generic ARM disassembler */
+static const struct disasm_interface db_disasm_interface = {
+	db_disasm_read_word,
+	db_disasm_printaddr,
+	db_printf
+};
+
+static u_int
+db_disasm_read_word(vm_offset_t address)
+{
+
+	return (db_get_value(address, INSN_SIZE, 0));
+}
+
+static void
+db_disasm_printaddr(vm_offset_t address)
+{
+
+	db_printsym((db_addr_t)address, DB_STGY_ANY);
+}
 
 vm_offset_t
 db_disasm(vm_offset_t loc, bool altfmt)
 {
-	return 0;
+
+	return (disasm(&db_disasm_interface, loc, altfmt));
 }
 
 /* End of db_disasm.c */

Added: head/sys/arm64/arm64/disassem.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/arm64/arm64/disassem.c	Fri Jan 29 13:06:30 2016	(r295038)
@@ -0,0 +1,330 @@
+/*-
+ * Copyright (c) 2016 Cavium
+ * All rights reserved.
+ *
+ * This software was developed by Semihalf.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+#include <sys/param.h>
+
+#include <sys/systm.h>
+#include <machine/disassem.h>
+#include <machine/armreg.h>
+#include <ddb/ddb.h>
+
+#define	ARM64_MAX_TOKEN_LEN	8
+#define	ARM64_MAX_TOKEN_CNT	10
+
+static const char *w_reg[] = {
+	"w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
+	"w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
+	"w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
+	"w24", "w25", "w26", "w27", "w28", "w29", "w30", "wSP",
+};
+
+static const char *x_reg[] = {
+	"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
+	"x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
+	"x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+	"x24", "x25", "x26", "x27", "x28", "x29", "LR", "SP",
+};
+
+static const char *shift_2[] = {
+	"LSL", "LSR", "ASR", "RSV"
+};
+
+/*
+ * Structure representing single token (operand) inside instruction.
+ * name   - name of operand
+ * pos    - position within the instruction (in bits)
+ * len    - operand length (in bits)
+ */
+struct arm64_insn_token {
+	char name[ARM64_MAX_TOKEN_LEN];
+	int pos;
+	int len;
+};
+
+/*
+ * Define generic types for instruction printing.
+ */
+enum arm64_format_type {
+	TYPE_01,	/* OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64
+			   OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 */
+};
+
+/*
+ * Structure representing single parsed instruction format.
+ * name   - opcode name
+ * format - opcode format in a human-readable way
+ * type   - syntax type for printing
+ * special_ops  - special options passed to a printer (if any)
+ * mask   - bitmask for instruction matching
+ * pattern      - pattern to look for
+ * tokens - array of tokens (operands) inside instruction
+ */
+struct arm64_insn {
+	char* name;
+	char* format;
+	enum arm64_format_type type;
+	uint64_t special_ops;
+	uint32_t mask;
+	uint32_t pattern;
+	struct arm64_insn_token tokens[ARM64_MAX_TOKEN_CNT];
+};
+
+/*
+ * Specify instruction opcode format in a human-readable way. Use notation
+ * obtained from ARM Architecture Reference Manual for ARMv8-A.
+ *
+ * Format string description:
+ *  Each group must be separated by "|". Group made of 0/1 is used to
+ *  generate mask and pattern for instruction matching. Groups containing
+ *  an operand token (in format NAME(length_bits)) are used to retrieve any
+ *  operand data from the instruction. Names here must be meaningful
+ *  and match the one described in the Manual.
+ *
+ * Token description:
+ * SF     - "0" represents 32-bit access, "1" represents 64-bit access
+ * SHIFT  - type of shift (instruction dependent)
+ * IMM    - immediate value
+ * Rx     - register number
+ */
+static struct arm64_insn arm64_i[] = {
+    { "add", "SF(1)|0001011|SHIFT(2)|0|RM(5)|IMM(6)|RN(5)|RD(5)", TYPE_01, 0 },
+    { "mov", "SF(1)|001000100000000000000|RN(5)|RD(5)", TYPE_01, 0 },
+    { "add", "SF(1)|0010001|SHIFT(2)|IMM(12)|RN(5)|RD(5)", TYPE_01, 0 },
+    { NULL, NULL }
+};
+
+static void
+arm64_disasm_generate_masks(struct arm64_insn *tab)
+{
+	uint32_t mask, val;
+	int a, i;
+	int len, ret;
+	int token = 0;
+	char *format;
+	int error;
+
+	while (tab->name != NULL) {
+		mask = 0;
+		val = 0;
+		format = tab->format;
+		token = 0;
+		error = 0;
+
+		/*
+		 * For each entry analyze format strings from the
+		 * left (i.e. from the MSB).
+		 */
+		a = (INSN_SIZE * NBBY) - 1;
+		while (*format != '\0' && (a >= 0)) {
+			switch(*format) {
+			case '0':
+				/* Bit is 0, add to mask and pattern */
+				mask |= (1 << a);
+				a--;
+				format++;
+				break;
+			case '1':
+				/* Bit is 1, add to mask and pattern */
+				mask |= (1 << a);
+				val |= (1 << a);
+				a--;
+				format++;
+				break;
+			case '|':
+				/* skip */
+				format++;
+				break;
+			default:
+				/* Token found, copy the name */
+				memset(tab->tokens[token].name, 0,
+				    sizeof(tab->tokens[token].name));
+				i = 0;
+				while (*format != '(') {
+					tab->tokens[token].name[i] = *format;
+					i++;
+					format++;
+					if (i >= ARM64_MAX_TOKEN_LEN) {
+						printf("ERROR: token too long in op %s\n",
+						    tab->name);
+						error = 1;
+						break;
+					}
+				}
+				if (error != 0)
+					break;
+
+				/* Read the length value */
+				ret = sscanf(format, "(%d)", &len);
+				if (ret == 1) {
+					if (token >= ARM64_MAX_TOKEN_CNT) {
+						printf("ERROR: to many tokens in op %s\n",
+						    tab->name);
+						error = 1;
+						break;
+					}
+
+					a -= len;
+					tab->tokens[token].pos = a + 1;
+					tab->tokens[token].len = len;
+					token++;
+				}
+
+				/* Skip to the end of the token */
+				while (*format != 0 && *format != '|')
+					format++;
+			}
+		}
+
+		/* Write mask and pattern to the instruction array */
+		tab->mask = mask;
+		tab->pattern = val;
+
+		/*
+		 * If we got here, format string must be parsed and "a"
+		 * should point to -1. If it's not, wrong number of bits
+		 * in format string. Mark this as invalid and prevent
+		 * from being matched.
+		 */
+		if (*format != 0 || (a != -1) || (error != 0)) {
+			tab->mask = 0;
+			tab->pattern = 0xffffffff;
+			printf("ERROR: skipping instruction op %s\n",
+			    tab->name);
+		}
+
+		tab++;
+	}
+}
+
+static int
+arm64_disasm_read_token(struct arm64_insn *insn, u_int opcode,
+    const char *token, int *val)
+{
+	int i;
+
+	for (i = 0; i < ARM64_MAX_TOKEN_CNT; i++) {
+		if (strcmp(insn->tokens[i].name, token) == 0) {
+			*val = (opcode >> insn->tokens[i].pos &
+			    ((1 << insn->tokens[i].len) - 1));
+			return (0);
+		}
+	}
+
+	return (EINVAL);
+}
+
+static const char *
+arm64_reg(int b64, int num)
+{
+
+	if (b64 != 0)
+		return (x_reg[num]);
+
+	return (w_reg[num]);
+}
+
+vm_offset_t
+disasm(const struct disasm_interface *di, vm_offset_t loc, int altfmt)
+{
+	struct arm64_insn *i_ptr = arm64_i;
+	uint32_t insn;
+	int matchp;
+	int ret;
+	int shift, rm, rd, rn, imm, sf;
+	int rm_absent;
+
+	/* Initialize defaults, all are 0 except SF indicating 64bit access */
+	shift = rd = rm = rn = imm = 0;
+	sf = 1;
+
+	matchp = 0;
+	insn = di->di_readword(loc);
+	while (i_ptr->name) {
+		/* If mask is 0 then the parser was not initialized yet */
+		if ((i_ptr->mask != 0) &&
+		    ((insn & i_ptr->mask) ==  i_ptr->pattern)) {
+			matchp = 1;
+			break;
+		}
+		i_ptr++;
+	}
+	if (matchp == 0)
+		goto undefined;
+
+	switch (i_ptr->type) {
+	case TYPE_01:
+		/* OP <RD>, <RN>, <RM>{, <shift [LSL, LSR, ASR]> #<imm>} SF32/64
+		   OP <RD>, <RN>, #<imm>{, <shift [0, 12]>} SF32/64 */
+
+		/* Mandatory tokens */
+		ret = arm64_disasm_read_token(i_ptr, insn, "SF", &sf);
+		ret |= arm64_disasm_read_token(i_ptr, insn, "RD", &rd);
+		ret |= arm64_disasm_read_token(i_ptr, insn, "RN", &rn);
+		if (ret != 0) {
+			printf("ERROR: Missing mandatory token for op %s type %d\n",
+			    i_ptr->name, i_ptr->type);
+			goto undefined;
+		}
+
+		/* Optional tokens */
+		arm64_disasm_read_token(i_ptr, insn, "IMM", &imm);
+		arm64_disasm_read_token(i_ptr, insn, "SHIFT", &shift);
+		rm_absent = arm64_disasm_read_token(i_ptr, insn, "RM", &rm);
+
+		di->di_printf("%s\t%s, %s", i_ptr->name, arm64_reg(sf, rd),
+		    arm64_reg(sf, rn));
+
+		/* If RM is present use it, otherwise use immediate notation */
+		if (rm_absent == 0) {
+			di->di_printf(", %s", arm64_reg(sf, rm));
+			if (imm != 0)
+				di->di_printf(", %s #%d", shift_2[shift], imm);
+		} else {
+			if (imm != 0 || shift != 0)
+				di->di_printf(", #0x%x", imm);
+			if (shift != 0)
+				di->di_printf(" LSL #12");
+		}
+		break;
+	default:
+		goto undefined;
+	}
+
+	di->di_printf("\n");
+	return(loc + INSN_SIZE);
+
+undefined:
+	di->di_printf("undefined\t%08x\n", insn);
+	return(loc + INSN_SIZE);
+}
+
+/* Parse format strings at the very beginning */
+SYSINIT(arm64_disasm_generate_masks, SI_SUB_DDB_SERVICES,
+    SI_ORDER_FIRST, arm64_disasm_generate_masks, arm64_i);

Modified: head/sys/arm64/include/armreg.h
==============================================================================
--- head/sys/arm64/include/armreg.h	Fri Jan 29 11:00:33 2016	(r295037)
+++ head/sys/arm64/include/armreg.h	Fri Jan 29 13:06:30 2016	(r295038)
@@ -33,6 +33,8 @@
 #ifndef _MACHINE_ARMREG_H_
 #define	_MACHINE_ARMREG_H_
 
+#define	INSN_SIZE		4
+
 #define	READ_SPECIALREG(reg)						\
 ({	uint64_t val;							\
 	__asm __volatile("mrs	%0, " __STRING(reg) : "=&r" (val));	\

Added: head/sys/arm64/include/disassem.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/arm64/include/disassem.h	Fri Jan 29 13:06:30 2016	(r295038)
@@ -0,0 +1,42 @@
+/*-
+ * Copyright (c) 2016 Cavium
+ * All rights reserved.
+ *
+ * This software was developed by Semihalf.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __DISASSEM_H_
+#define	__DISASSEM_H_
+
+struct disasm_interface {
+	u_int	(*di_readword)(vm_offset_t);
+	void	(*di_printaddr)(vm_offset_t);
+	int	(*di_printf)(const char *, ...) __printflike(1, 2);
+};
+
+vm_offset_t disasm(const struct disasm_interface *, vm_offset_t, int);
+
+#endif /* __DISASSEM_H_ */

Modified: head/sys/conf/files.arm64
==============================================================================
--- head/sys/conf/files.arm64	Fri Jan 29 11:00:33 2016	(r295037)
+++ head/sys/conf/files.arm64	Fri Jan 29 13:06:30 2016	(r295038)
@@ -20,6 +20,7 @@ arm64/arm64/db_disasm.c		optional	ddb
 arm64/arm64/db_interface.c	optional	ddb
 arm64/arm64/db_trace.c		optional	ddb
 arm64/arm64/debug_monitor.c	optional	kdb
+arm64/arm64/disassem.c		optional	ddb
 arm64/arm64/dump_machdep.c	standard
 arm64/arm64/elf_machdep.c	standard
 arm64/arm64/exception.S		standard



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201601291306.u0TD6UoB000571>