Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 29 Jan 2018 09:24:28 +0000 (UTC)
From:      Wojciech Macek <wma@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r328536 - in head/stand: common powerpc/kboot
Message-ID:  <201801290924.w0T9OSix008403@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: wma
Date: Mon Jan 29 09:24:28 2018
New Revision: 328536
URL: https://svnweb.freebsd.org/changeset/base/328536

Log:
  loader: support for mixed-endianness ELF/loader and POWER8
  
  On POWER8 with current petitpoot, the loader.kboot might be
  run as little-endian application. The FreeBSD kernel is
  always big-endian, so the load_elf_* routines must be aware
  of proper endianness of all fields.
  
  Submitted by:          Wojciech Macek <wma@semihalf.com>
  Obtained from:         Semihalf
  Sponsored by:          IBM, QCM Technologies
  Differential revision: https://reviews.freebsd.org/D12422

Modified:
  head/stand/common/bootstrap.h
  head/stand/common/load_elf.c
  head/stand/powerpc/kboot/conf.c
  head/stand/powerpc/kboot/host_syscall.S
  head/stand/powerpc/kboot/host_syscall.h
  head/stand/powerpc/kboot/hostdisk.c
  head/stand/powerpc/kboot/kerneltramp.S
  head/stand/powerpc/kboot/main.c
  head/stand/powerpc/kboot/metadata.c
  head/stand/powerpc/kboot/ppc64_elf_freebsd.c

Modified: head/stand/common/bootstrap.h
==============================================================================
--- head/stand/common/bootstrap.h	Mon Jan 29 09:21:08 2018	(r328535)
+++ head/stand/common/bootstrap.h	Mon Jan 29 09:24:28 2018	(r328536)
@@ -315,6 +315,9 @@ struct arch_switch
 
     /* Probe ZFS pool(s), if needed. */
     void	(*arch_zfs_probe)(void);
+
+    /* For kexec-type loaders, get ksegment structure */
+    void	(*arch_kexec_kseg_get)(int *nseg, void **kseg);
 };
 extern struct arch_switch archsw;
 

Modified: head/stand/common/load_elf.c
==============================================================================
--- head/stand/common/load_elf.c	Mon Jan 29 09:21:08 2018	(r328535)
+++ head/stand/common/load_elf.c	Mon Jan 29 09:24:28 2018	(r328536)
@@ -29,6 +29,7 @@
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
+#include <sys/endian.h>
 #include <sys/exec.h>
 #include <sys/linker.h>
 #include <sys/module.h>
@@ -118,15 +119,72 @@ __elfN(load_elf_header)(char *filename, elf_file_t ef)
 		err = EFTYPE;
 		goto error;
 	}
+
 	if (ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || /* Layout ? */
 	    ehdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
-	    ehdr->e_ident[EI_VERSION] != EV_CURRENT || /* Version ? */
-	    ehdr->e_version != EV_CURRENT ||
-	    ehdr->e_machine != ELF_TARG_MACH) { /* Machine ? */
+	    ehdr->e_ident[EI_VERSION] != EV_CURRENT) /* Version ? */ {
 		err = EFTYPE;
 		goto error;
 	}
 
+	/*
+	 * Fixup ELF endianness.
+	 *
+	 * The Xhdr structure was loaded using block read call to
+	 * optimize file accesses. It might happen, that the endianness
+	 * of the system memory is different that endianness of
+	 * the ELF header.
+	 * Swap fields here to guarantee that Xhdr always contain
+	 * valid data regardless of architecture.
+	 */
+	if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) {
+		ehdr->e_type = be16toh(ehdr->e_type);
+		ehdr->e_machine = be16toh(ehdr->e_machine);
+		ehdr->e_version = be32toh(ehdr->e_version);
+		if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) {
+			ehdr->e_entry = be64toh(ehdr->e_entry);
+			ehdr->e_phoff = be64toh(ehdr->e_phoff);
+			ehdr->e_shoff = be64toh(ehdr->e_shoff);
+		} else {
+			ehdr->e_entry = be32toh(ehdr->e_entry);
+			ehdr->e_phoff = be32toh(ehdr->e_phoff);
+			ehdr->e_shoff = be32toh(ehdr->e_shoff);
+		}
+		ehdr->e_flags = be32toh(ehdr->e_flags);
+		ehdr->e_ehsize = be16toh(ehdr->e_ehsize);
+		ehdr->e_phentsize = be16toh(ehdr->e_phentsize);
+		ehdr->e_phnum = be16toh(ehdr->e_phnum);
+		ehdr->e_shentsize = be16toh(ehdr->e_shentsize);
+		ehdr->e_shnum = be16toh(ehdr->e_shnum);
+		ehdr->e_shstrndx = be16toh(ehdr->e_shstrndx);
+
+	} else {
+		ehdr->e_type = le16toh(ehdr->e_type);
+		ehdr->e_machine = le16toh(ehdr->e_machine);
+		ehdr->e_version = le32toh(ehdr->e_version);
+		if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) {
+			ehdr->e_entry = le64toh(ehdr->e_entry);
+			ehdr->e_phoff = le64toh(ehdr->e_phoff);
+			ehdr->e_shoff = le64toh(ehdr->e_shoff);
+		} else {
+			ehdr->e_entry = le32toh(ehdr->e_entry);
+			ehdr->e_phoff = le32toh(ehdr->e_phoff);
+			ehdr->e_shoff = le32toh(ehdr->e_shoff);
+		}
+		ehdr->e_flags = le32toh(ehdr->e_flags);
+		ehdr->e_ehsize = le16toh(ehdr->e_ehsize);
+		ehdr->e_phentsize = le16toh(ehdr->e_phentsize);
+		ehdr->e_phnum = le16toh(ehdr->e_phnum);
+		ehdr->e_shentsize = le16toh(ehdr->e_shentsize);
+		ehdr->e_shnum = le16toh(ehdr->e_shnum);
+		ehdr->e_shstrndx = le16toh(ehdr->e_shstrndx);
+	}
+
+	if (ehdr->e_version != EV_CURRENT || ehdr->e_machine != ELF_TARG_MACH) { /* Machine ? */
+		err = EFTYPE;
+		goto error;
+	}
+
 	return (0);
 
 error:
@@ -317,6 +375,15 @@ __elfN(loadimage)(struct preloaded_file *fp, elf_file_
     u_int	fpcopy;
     Elf_Sym	sym;
     Elf_Addr	p_start, p_end;
+#if __ELF_WORD_SIZE == 64
+    uint64_t scr_ssym;
+    uint64_t scr_esym;
+    uint64_t scr;
+#else
+    uint32_t scr_ssym;
+    uint32_t scr_esym;
+    uint32_t scr;
+#endif
 
     dp = NULL;
     shdr = NULL;
@@ -391,6 +458,54 @@ __elfN(loadimage)(struct preloaded_file *fp, elf_file_
     phdr = (Elf_Phdr *)(ef->firstpage + ehdr->e_phoff);
 
     for (i = 0; i < ehdr->e_phnum; i++) {
+	/*
+	 * Fixup ELF endianness.
+	 *
+	 * The Xhdr structure was loaded using block read call to
+	 * optimize file accesses. It might happen, that the endianness
+	 * of the system memory is different that endianness of
+	 * the ELF header.
+	 * Swap fields here to guarantee that Xhdr always contain
+	 * valid data regardless of architecture.
+	 */
+	if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) {
+		phdr[i].p_type = be32toh(phdr[i].p_type);
+		phdr[i].p_flags = be32toh(phdr[i].p_flags);
+		if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) {
+			phdr[i].p_offset = be64toh(phdr[i].p_offset);
+			phdr[i].p_vaddr = be64toh(phdr[i].p_vaddr);
+			phdr[i].p_paddr = be64toh(phdr[i].p_paddr);
+			phdr[i].p_filesz = be64toh(phdr[i].p_filesz);
+			phdr[i].p_memsz = be64toh(phdr[i].p_memsz);
+			phdr[i].p_align = be64toh(phdr[i].p_align);
+		} else {
+			phdr[i].p_offset = be32toh(phdr[i].p_offset);
+			phdr[i].p_vaddr = be32toh(phdr[i].p_vaddr);
+			phdr[i].p_paddr = be32toh(phdr[i].p_paddr);
+			phdr[i].p_filesz = be32toh(phdr[i].p_filesz);
+			phdr[i].p_memsz = be32toh(phdr[i].p_memsz);
+			phdr[i].p_align = be32toh(phdr[i].p_align);
+		}
+	} else {
+		phdr[i].p_type = le32toh(phdr[i].p_type);
+		phdr[i].p_flags = le32toh(phdr[i].p_flags);
+		if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) {
+			phdr[i].p_offset = le64toh(phdr[i].p_offset);
+			phdr[i].p_vaddr = le64toh(phdr[i].p_vaddr);
+			phdr[i].p_paddr = le64toh(phdr[i].p_paddr);
+			phdr[i].p_filesz = le64toh(phdr[i].p_filesz);
+			phdr[i].p_memsz = le64toh(phdr[i].p_memsz);
+			phdr[i].p_align = le64toh(phdr[i].p_align);
+		} else {
+			phdr[i].p_offset = le32toh(phdr[i].p_offset);
+			phdr[i].p_vaddr = le32toh(phdr[i].p_vaddr);
+			phdr[i].p_paddr = le32toh(phdr[i].p_paddr);
+			phdr[i].p_filesz = le32toh(phdr[i].p_filesz);
+			phdr[i].p_memsz = le32toh(phdr[i].p_memsz);
+			phdr[i].p_align = le32toh(phdr[i].p_align);
+		}
+	}
+
 	/* We want to load PT_LOAD segments only.. */
 	if (phdr[i].p_type != PT_LOAD)
 	    continue;
@@ -465,6 +580,60 @@ __elfN(loadimage)(struct preloaded_file *fp, elf_file_
 	    "_loadimage: failed to read section headers");
 	goto nosyms;
     }
+
+    /*
+     * Fixup ELF endianness.
+     *
+     * The Xhdr structure was loaded using block read call to
+     * optimize file accesses. It might happen, that the endianness
+     * of the system memory is different that endianness of
+     * the ELF header.
+     * Swap fields here to guarantee that Xhdr always contain
+     * valid data regardless of architecture.
+     */
+    for (i = 0; i < ehdr->e_shnum; i++) {
+	if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) {
+		shdr[i].sh_name = be32toh(shdr[i].sh_name);
+		shdr[i].sh_type = be32toh(shdr[i].sh_type);
+		shdr[i].sh_link = be32toh(shdr[i].sh_link);
+		shdr[i].sh_info = be32toh(shdr[i].sh_info);
+		if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) {
+			shdr[i].sh_flags = be64toh(shdr[i].sh_flags);
+			shdr[i].sh_addr = be64toh(shdr[i].sh_addr);
+			shdr[i].sh_offset = be64toh(shdr[i].sh_offset);
+			shdr[i].sh_size = be64toh(shdr[i].sh_size);
+			shdr[i].sh_addralign = be64toh(shdr[i].sh_addralign);
+			shdr[i].sh_entsize = be64toh(shdr[i].sh_entsize);
+		} else {
+			shdr[i].sh_flags = be32toh(shdr[i].sh_flags);
+			shdr[i].sh_addr = be32toh(shdr[i].sh_addr);
+			shdr[i].sh_offset = be32toh(shdr[i].sh_offset);
+			shdr[i].sh_size = be32toh(shdr[i].sh_size);
+			shdr[i].sh_addralign = be32toh(shdr[i].sh_addralign);
+			shdr[i].sh_entsize = be32toh(shdr[i].sh_entsize);
+		}
+	} else {
+		shdr[i].sh_name = le32toh(shdr[i].sh_name);
+		shdr[i].sh_type = le32toh(shdr[i].sh_type);
+		shdr[i].sh_link = le32toh(shdr[i].sh_link);
+		shdr[i].sh_info = le32toh(shdr[i].sh_info);
+		if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) {
+			shdr[i].sh_flags = le64toh(shdr[i].sh_flags);
+			shdr[i].sh_addr = le64toh(shdr[i].sh_addr);
+			shdr[i].sh_offset = le64toh(shdr[i].sh_offset);
+			shdr[i].sh_size = le64toh(shdr[i].sh_size);
+			shdr[i].sh_addralign = le64toh(shdr[i].sh_addralign);
+			shdr[i].sh_entsize = le64toh(shdr[i].sh_entsize);
+		} else {
+			shdr[i].sh_flags = le32toh(shdr[i].sh_flags);
+			shdr[i].sh_addr = le32toh(shdr[i].sh_addr);
+			shdr[i].sh_offset = le32toh(shdr[i].sh_offset);
+			shdr[i].sh_size = le32toh(shdr[i].sh_size);
+			shdr[i].sh_addralign = le32toh(shdr[i].sh_addralign);
+			shdr[i].sh_entsize = le32toh(shdr[i].sh_entsize);
+		}
+	}
+    }
     file_addmetadata(fp, MODINFOMD_SHDR, chunk, shdr);
 
     /*
@@ -540,10 +709,16 @@ __elfN(loadimage)(struct preloaded_file *fp, elf_file_
 		break;
 	}
 #endif
-
 	size = shdr[i].sh_size;
-	archsw.arch_copyin(&size, lastaddr, sizeof(size));
-	lastaddr += sizeof(size);
+#if defined(__powerpc__)
+  #if __ELF_WORD_SIZE == 64
+	scr = htobe64(size);
+  #else
+	scr = htobe32(size);
+  #endif
+#endif
+	archsw.arch_copyin(&scr, lastaddr, sizeof(scr));
+	lastaddr += sizeof(scr);
 
 #ifdef ELF_VERBOSE
 	printf("\n%s: 0x%jx@0x%jx -> 0x%jx-0x%jx", secname,
@@ -582,8 +757,22 @@ __elfN(loadimage)(struct preloaded_file *fp, elf_file_
     printf("]");
 #endif
 
-    file_addmetadata(fp, MODINFOMD_SSYM, sizeof(ssym), &ssym);
-    file_addmetadata(fp, MODINFOMD_ESYM, sizeof(esym), &esym);
+#if defined(__powerpc__)
+  /* On PowerPC we always need to provide BE data to the kernel */
+  #if __ELF_WORD_SIZE == 64
+    scr_ssym = htobe64((uint64_t)ssym);
+    scr_esym = htobe64((uint64_t)esym);
+  #else
+    scr_ssym = htobe32((uint32_t)ssym);
+    scr_esym = htobe32((uint32_t)esym);
+  #endif
+#else
+    scr_ssym = ssym;
+    scr_esym = esym;
+#endif
+
+    file_addmetadata(fp, MODINFOMD_SSYM, sizeof(scr_ssym), &scr_ssym);
+    file_addmetadata(fp, MODINFOMD_ESYM, sizeof(scr_esym), &scr_esym);
 
 nosyms:
     printf("\n");

Modified: head/stand/powerpc/kboot/conf.c
==============================================================================
--- head/stand/powerpc/kboot/conf.c	Mon Jan 29 09:21:08 2018	(r328535)
+++ head/stand/powerpc/kboot/conf.c	Mon Jan 29 09:24:28 2018	(r328536)
@@ -78,6 +78,7 @@ struct fs_ops *file_system[] = {
 #if defined(LOADER_BZIP2_SUPPORT)
     &bzipfs_fsops,
 #endif
+    &dosfs_fsops,
     NULL
 };
 

Modified: head/stand/powerpc/kboot/host_syscall.S
==============================================================================
--- head/stand/powerpc/kboot/host_syscall.S	Mon Jan 29 09:21:08 2018	(r328535)
+++ head/stand/powerpc/kboot/host_syscall.S	Mon Jan 29 09:24:28 2018	(r328536)
@@ -14,7 +14,6 @@ ENTRY(host_read)
 	li %r3, 0
 	blr
 
-
 ENTRY(host_write)
 	li %r0, 4 # SYS_write
 	sc
@@ -28,6 +27,11 @@ ENTRY(host_seek)
 	sc
 	blr
 
+ENTRY(host_llseek)
+	li %r0, 140 # SYS_llseek
+	sc
+	blr
+
 ENTRY(host_open)
 	li %r0, 5 # SYS_open
 	sc
@@ -44,6 +48,11 @@ ENTRY(host_close)
 
 ENTRY(host_mmap)
 	li %r0, 90 # SYS_mmap
+	sc
+	blr
+
+ENTRY(host_uname)
+	li %r0, 122 # SYS_uname
 	sc
 	blr
 

Modified: head/stand/powerpc/kboot/host_syscall.h
==============================================================================
--- head/stand/powerpc/kboot/host_syscall.h	Mon Jan 29 09:21:08 2018	(r328535)
+++ head/stand/powerpc/kboot/host_syscall.h	Mon Jan 29 09:24:28 2018	(r328536)
@@ -34,9 +34,18 @@ ssize_t host_read(int fd, void *buf, size_t nbyte);
 ssize_t host_write(int fd, const void *buf, size_t nbyte);
 ssize_t host_seek(int fd, int64_t offset, int whence);
 int host_open(const char *path, int flags, int mode);
+ssize_t host_llseek(int fd, int32_t offset_high, int32_t offset_lo, uint64_t *result, int whence);
 int host_close(int fd);
 void *host_mmap(void *addr, size_t len, int prot, int flags, int fd, int);
 #define host_getmem(size) host_mmap(0, size, 3 /* RW */, 0x22 /* ANON */, -1, 0);
+struct old_utsname {
+	char sysname[65];
+	char nodename[65];
+	char release[65];
+	char version[65];
+	char machine[65];
+};
+int host_uname(struct old_utsname *);
 struct host_timeval {
 	int tv_sec;
 	int tv_usec;
@@ -44,8 +53,8 @@ struct host_timeval {
 int host_gettimeofday(struct host_timeval *a, void *b);
 int host_select(int nfds, long *readfds, long *writefds, long *exceptfds,
     struct host_timeval *timeout);
-int kexec_load(vm_offset_t start, int nsegs, void *segs);
-int host_reboot(int, int, int, void *);
+int kexec_load(uint32_t start, int nsegs, uint32_t segs);
+int host_reboot(int, int, int, uint32_t);
 int host_getdents(int fd, void *dirp, int count);
 
 #endif

Modified: head/stand/powerpc/kboot/hostdisk.c
==============================================================================
--- head/stand/powerpc/kboot/hostdisk.c	Mon Jan 29 09:21:08 2018	(r328535)
+++ head/stand/powerpc/kboot/hostdisk.c	Mon Jan 29 09:24:28 2018	(r328536)
@@ -64,10 +64,14 @@ hostdisk_strategy(void *devdata, int flag, daddr_t dbl
 	struct devdesc *desc = devdata;
 	daddr_t pos;
 	int n;
-	
+	uint64_t res;
+	uint32_t posl, posh;
+
 	pos = dblk * 512;
 
-	if (host_seek(desc->d_unit, pos, 0) < 0) {
+	posl = pos & 0xffffffff;
+	posh = (pos >> 32) & 0xffffffff;
+	if (host_llseek(desc->d_unit, posh, posl, &res, 0) < 0) {
 		printf("Seek error\n");
 		return (EIO);
 	}

Modified: head/stand/powerpc/kboot/kerneltramp.S
==============================================================================
--- head/stand/powerpc/kboot/kerneltramp.S	Mon Jan 29 09:21:08 2018	(r328535)
+++ head/stand/powerpc/kboot/kerneltramp.S	Mon Jan 29 09:24:28 2018	(r328536)
@@ -20,6 +20,18 @@ CNAME(kerneltramp):
 	bl 2f
 	.space 24	/* branch address, r3-r7 */
 
+/*
+ * MUST BE IN SYNC WITH:
+ *  struct trampoline_data {
+ *   uint32_t	kernel_entry;
+ *   uint32_t	dtb;
+ *   uint32_t	phys_mem_offset;
+ *   uint32_t	of_entry;
+ *   uint32_t	mdp;
+ *   uint32_t	mdp_size;
+ *  };
+ */
+
 . = kerneltramp + 0x40	/* AP spinlock */
 	.long 0
 
@@ -36,18 +48,53 @@ CNAME(kerneltramp):
 	sync
 	ba	0x100
 
-2:			/* Continuation of kerneltramp */
+2:	/* Continuation of kerneltramp */
 	mflr	%r8
 	mtlr	%r9
-	lwz	%r3,0(%r8)
-	mtctr	%r3
+
+	mfmsr	%r10
+	andi.	%r10, %r10, 1	/* test MSR_LE */
+	bne	little_endian
+
+/* We're starting in BE */
+big_endian:
 	lwz	%r3,4(%r8)
 	lwz	%r4,8(%r8)
 	lwz	%r5,12(%r8)
 	lwz	%r6,16(%r8)
 	lwz	%r7,20(%r8)
+
+	lwz	%r10, 0(%r8)
+	mtctr	%r10
 	bctr
-	
+
+/* We're starting in LE */
+little_endian:
+
+	/* Entries are BE, swap them during load. */
+	li	%r10, 4
+	lwbrx	%r3, %r8, %r10
+	li	%r10, 8
+	lwbrx	%r4, %r8, %r10
+	li	%r10, 12
+	lwbrx	%r5, %r8, %r10
+	li	%r10, 16
+	lwbrx	%r6, %r8, %r10
+	li	%r10, 20
+	lwbrx	%r7, %r8, %r10
+
+	/* Clear MSR_LE flag to enter the BE world */
+	mfmsr	%r10
+	clrrdi	%r10, %r10, 1
+	mtsrr1	%r10
+
+	/* Entry is at 0(%r8) */
+	li	%r10, 0
+	lwbrx	%r10, %r8, %r10
+	mtsrr0	%r10
+
+	rfid
+
 endkerneltramp:
 
 	.data

Modified: head/stand/powerpc/kboot/main.c
==============================================================================
--- head/stand/powerpc/kboot/main.c	Mon Jan 29 09:21:08 2018	(r328535)
+++ head/stand/powerpc/kboot/main.c	Mon Jan 29 09:24:28 2018	(r328536)
@@ -27,6 +27,7 @@
 __FBSDID("$FreeBSD$");
 
 #include <stand.h>
+#include <sys/endian.h>
 #include <sys/param.h>
 #include <fdt_platform.h>
 
@@ -35,6 +36,7 @@ __FBSDID("$FreeBSD$");
 #include "bootstrap.h"
 #include "host_syscall.h"
 
+
 struct arch_switch	archsw;
 extern void *_end;
 
@@ -47,9 +49,170 @@ ssize_t kboot_readin(const int fd, vm_offset_t dest, c
 int kboot_autoload(void);
 uint64_t kboot_loadaddr(u_int type, void *data, uint64_t addr);
 int kboot_setcurrdev(struct env_var *ev, int flags, const void *value);
+static void kboot_kseg_get(int *nseg, void **ptr);
 
 extern int command_fdt_internal(int argc, char *argv[]);
 
+struct region_desc {
+	uint64_t start;
+	uint64_t end;
+};
+
+static uint64_t
+kboot_get_phys_load_segment(void)
+{
+	int fd;
+	uint64_t entry[2];
+	static uint64_t load_segment = ~(0UL);
+	uint64_t val_64;
+	uint32_t val_32;
+	struct region_desc rsvd_reg[32];
+	int rsvd_reg_cnt = 0;
+	int ret, a, b;
+	uint64_t start, end;
+
+	if (load_segment == ~(0UL)) {
+
+		/* Default load address is 0x00000000 */
+		load_segment = 0UL;
+
+		/* Read reserved regions */
+		fd = host_open("/proc/device-tree/reserved-ranges", O_RDONLY, 0);
+		if (fd >= 0) {
+			while (host_read(fd, &entry[0], sizeof(entry)) == sizeof(entry)) {
+				rsvd_reg[rsvd_reg_cnt].start = be64toh(entry[0]);
+				rsvd_reg[rsvd_reg_cnt].end =
+				    be64toh(entry[1]) + rsvd_reg[rsvd_reg_cnt].start - 1;
+				rsvd_reg_cnt++;
+			}
+			host_close(fd);
+		}
+		/* Read where the kernel ends */
+		fd = host_open("/proc/device-tree/chosen/linux,kernel-end", O_RDONLY, 0);
+		if (fd >= 0) {
+			ret = host_read(fd, &val_64, sizeof(val_64));
+
+			if (ret == sizeof(uint64_t)) {
+				rsvd_reg[rsvd_reg_cnt].start = 0;
+				rsvd_reg[rsvd_reg_cnt].end = be64toh(val_64) - 1;
+			} else {
+				memcpy(&val_32, &val_64, sizeof(val_32));
+				rsvd_reg[rsvd_reg_cnt].start = 0;
+				rsvd_reg[rsvd_reg_cnt].end = be32toh(val_32) - 1;
+			}
+			rsvd_reg_cnt++;
+
+			host_close(fd);
+		}
+		/* Read memory size (SOCKET0 only) */
+		fd = host_open("/proc/device-tree/memory@0/reg", O_RDONLY, 0);
+		if (fd < 0)
+			fd = host_open("/proc/device-tree/memory/reg", O_RDONLY, 0);
+		if (fd >= 0) {
+			ret = host_read(fd, &entry, sizeof(entry));
+
+			/* Memory range in start:length format */
+			entry[0] = be64toh(entry[0]);
+			entry[1] = be64toh(entry[1]);
+
+			/* Reserve everything what is before start */
+			if (entry[0] != 0) {
+				rsvd_reg[rsvd_reg_cnt].start = 0;
+				rsvd_reg[rsvd_reg_cnt].end = entry[0] - 1;
+				rsvd_reg_cnt++;
+			}
+			/* Reserve everything what is after end */
+			if (entry[1] != 0xffffffffffffffffUL) {
+				rsvd_reg[rsvd_reg_cnt].start = entry[0] + entry[1];
+				rsvd_reg[rsvd_reg_cnt].end = 0xffffffffffffffffUL;
+				rsvd_reg_cnt++;
+			}
+
+			host_close(fd);
+		}
+
+		/* Sort entries in ascending order (bubble) */
+		for (a = rsvd_reg_cnt - 1; a > 0; a--) {
+			for (b = 0; b < a; b++) {
+				if (rsvd_reg[b].start > rsvd_reg[b + 1].start) {
+					struct region_desc tmp;
+					tmp = rsvd_reg[b];
+					rsvd_reg[b] = rsvd_reg[b + 1];
+					rsvd_reg[b + 1] = tmp;
+				}
+			}
+		}
+
+		/* Join overlapping/adjacent regions */
+		for (a = 0; a < rsvd_reg_cnt - 1; ) {
+
+			if ((rsvd_reg[a + 1].start >= rsvd_reg[a].start) &&
+			    ((rsvd_reg[a + 1].start - 1) <= rsvd_reg[a].end)) {
+				/* We have overlapping/adjacent regions! */
+				rsvd_reg[a].end =
+				    MAX(rsvd_reg[a].end, rsvd_reg[a + a].end);
+
+				for (b = a + 1; b < rsvd_reg_cnt - 1; b++)
+					rsvd_reg[b] = rsvd_reg[b + 1];
+				rsvd_reg_cnt--;
+			} else
+				a++;
+		}
+
+		/* Find the first free region */
+		if (rsvd_reg_cnt > 0) {
+			start = 0;
+			end = rsvd_reg[0].start;
+			for (a = 0; a < rsvd_reg_cnt - 1; a++) {
+				if ((start >= rsvd_reg[a].start) &&
+				    (start <= rsvd_reg[a].end)) {
+					start = rsvd_reg[a].end + 1;
+					end = rsvd_reg[a + 1].start;
+				} else
+					break;
+			}
+
+			if (start != end) {
+				uint64_t align = 64UL*1024UL*1024UL;
+
+				/* Align both to 64MB boundary */
+				start = (start + align - 1UL) & ~(align - 1UL);
+				end = ((end + 1UL) & ~(align - 1UL)) - 1UL;
+
+				if (start < end)
+					load_segment = start;
+			}
+		}
+	}
+
+	return (load_segment);
+}
+
+uint8_t
+kboot_get_kernel_machine_bits(void)
+{
+	static uint8_t bits = 0;
+	struct old_utsname utsname;
+	int ret;
+
+	if (bits == 0) {
+		/* Default is 32-bit kernel */
+		bits = 32;
+
+		/* Try to get system type */
+		memset(&utsname, 0, sizeof(utsname));
+		ret = host_uname(&utsname);
+		if (ret == 0) {
+			if (strcmp(utsname.machine, "ppc64") == 0)
+				bits = 64;
+			else if (strcmp(utsname.machine, "ppc64le") == 0)
+				bits = 64;
+		}
+	}
+
+	return (bits);
+}
+
 int
 kboot_getdev(void **vdev, const char *devspec, const char **path)
 {
@@ -94,7 +257,7 @@ main(int argc, const char **argv)
 {
 	void *heapbase;
 	const size_t heapsize = 15*1024*1024;
-	const char *bootdev = argv[1];
+	const char *bootdev;
 
 	/*
 	 * Set the heap to one page after the end of the loader.
@@ -107,6 +270,12 @@ main(int argc, const char **argv)
 	 */
 	cons_probe();
 
+	/* Choose bootdev if provided */
+	if (argc > 1)
+		bootdev = argv[1];
+	else
+		bootdev = "";
+
 	printf("Boot device: %s\n", bootdev);
 
 	archsw.arch_getdev = kboot_getdev;
@@ -115,6 +284,7 @@ main(int argc, const char **argv)
 	archsw.arch_readin = kboot_readin;
 	archsw.arch_autoload = kboot_autoload;
 	archsw.arch_loadaddr = kboot_loadaddr;
+	archsw.arch_kexec_kseg_get = kboot_kseg_get;
 
 	printf("\n%s", bootprog_info);
 
@@ -181,7 +351,7 @@ static ssize_t
 get_phys_buffer(vm_offset_t dest, const size_t len, void **buf)
 {
 	int i = 0;
-	const size_t segsize = 2*1024*1024;
+	const size_t segsize = 4*1024*1024;
 
 	for (i = 0; i < nkexec_segments; i++) {
 		if (dest >= (vm_offset_t)loaded_segments[i].mem &&
@@ -194,6 +364,7 @@ get_phys_buffer(vm_offset_t dest, const size_t len, vo
 	loaded_segments[nkexec_segments].bufsz = segsize;
 	loaded_segments[nkexec_segments].mem = (void *)rounddown2(dest,segsize);
 	loaded_segments[nkexec_segments].memsz = segsize;
+
 	i = nkexec_segments;
 	nkexec_segments++;
 
@@ -283,17 +454,32 @@ kboot_autoload(void)
 uint64_t
 kboot_loadaddr(u_int type, void *data, uint64_t addr)
 {
-	/*
-	 * Need to stay out of the way of Linux. /chosen/linux,kernel-end does
-	 * a better job here, but use a fixed offset for now.
-	 */
 
 	if (type == LOAD_ELF)
 		addr = roundup(addr, PAGE_SIZE);
 	else
-		addr += 64*1024*1024; /* Stay out of the way of Linux */
+		addr += kboot_get_phys_load_segment();
 
 	return (addr);
+}
+
+static void
+kboot_kseg_get(int *nseg, void **ptr)
+{
+#if 0
+	int a;
+
+	for (a = 0; a < nkexec_segments; a++) {
+		printf("kseg_get: %jx %jx %jx %jx\n",
+			(uintmax_t)loaded_segments[a].buf,
+			(uintmax_t)loaded_segments[a].bufsz,
+			(uintmax_t)loaded_segments[a].mem,
+			(uintmax_t)loaded_segments[a].memsz);
+	}
+#endif
+
+	*nseg = nkexec_segments;
+	*ptr = &loaded_segments[0];
 }
 
 void

Modified: head/stand/powerpc/kboot/metadata.c
==============================================================================
--- head/stand/powerpc/kboot/metadata.c	Mon Jan 29 09:21:08 2018	(r328535)
+++ head/stand/powerpc/kboot/metadata.c	Mon Jan 29 09:24:28 2018	(r328536)
@@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$");
 
 #include <stand.h>
 #include <sys/param.h>
+#include <sys/endian.h>
 #include <sys/reboot.h>
 #include <sys/linker.h>
 #include <sys/boot.h>
@@ -157,7 +158,7 @@ md_copyenv(vm_offset_t addr)
 static int align;
 
 #define COPY32(v, a, c) {			\
-    u_int32_t	x = (v);			\
+    u_int32_t	x = htobe32(v);			\
     if (c)					\
         archsw.arch_copyin(&x, a, sizeof(x));	\
     a += sizeof(x);				\
@@ -254,11 +255,12 @@ md_load_dual(char *args, vm_offset_t *modulep, vm_offs
     vm_offset_t			fdtp;
     vm_offset_t			size;
     uint64_t			scratch64;
+    uint32_t			scratch32;
     char			*rootdevname;
     int				howto;
 
     align = kern64 ? 8 : 4;
-    howto = md_getboothowto(args);
+    howto = htobe32(md_getboothowto(args));
 
     /* 
      * Allow the environment variable 'rootdev' to override the supplied device 
@@ -300,16 +302,19 @@ md_load_dual(char *args, vm_offset_t *modulep, vm_offs
 	panic("can't find kernel file");
     file_addmetadata(kfp, MODINFOMD_HOWTO, sizeof howto, &howto);
     if (kern64) {
-	scratch64 = envp;
+	scratch64 = htobe64(envp);
 	file_addmetadata(kfp, MODINFOMD_ENVP, sizeof scratch64, &scratch64);
-	scratch64 = fdtp;
+	scratch64 = htobe64(fdtp);
 	file_addmetadata(kfp, MODINFOMD_DTBP, sizeof scratch64, &scratch64);
-	scratch64 = kernend;
+	scratch64 = htobe64(kernend);
 	file_addmetadata(kfp, MODINFOMD_KERNEND, sizeof scratch64, &scratch64);
     } else {
-	file_addmetadata(kfp, MODINFOMD_ENVP, sizeof envp, &envp);
-	file_addmetadata(kfp, MODINFOMD_DTBP, sizeof fdtp, &fdtp);
-	file_addmetadata(kfp, MODINFOMD_KERNEND, sizeof kernend, &kernend);
+	scratch32 = htobe32(envp);
+	file_addmetadata(kfp, MODINFOMD_ENVP, sizeof scratch32, &scratch32);
+	scratch32 = htobe32(fdtp);
+	file_addmetadata(kfp, MODINFOMD_DTBP, sizeof scratch32, &scratch32);
+	scratch32 = htobe32(kernend);
+	file_addmetadata(kfp, MODINFOMD_KERNEND, sizeof scratch32, &scratch32);
     }
 
     *modulep = addr;
@@ -318,7 +323,7 @@ md_load_dual(char *args, vm_offset_t *modulep, vm_offs
 
     md = file_findmetadata(kfp, MODINFOMD_KERNEND);
     if (kern64) {
-	scratch64 = kernend;
+	scratch64 = htobe64(kernend);
 	bcopy(&scratch64, md->md_data, sizeof scratch64);
     } else {
 	bcopy(&kernend, md->md_data, sizeof kernend);

Modified: head/stand/powerpc/kboot/ppc64_elf_freebsd.c
==============================================================================
--- head/stand/powerpc/kboot/ppc64_elf_freebsd.c	Mon Jan 29 09:21:08 2018	(r328535)
+++ head/stand/powerpc/kboot/ppc64_elf_freebsd.c	Mon Jan 29 09:24:28 2018	(r328536)
@@ -30,6 +30,7 @@ __FBSDID("$FreeBSD$");
 #define __ELF_WORD_SIZE 64
 
 #include <sys/param.h>
+#include <sys/endian.h>
 #include <sys/linker.h>
 
 #include <machine/metadata.h>
@@ -43,9 +44,16 @@ __FBSDID("$FreeBSD$");
 extern char		end[];
 extern void		*kerneltramp;
 extern size_t		szkerneltramp;
-extern int		nkexec_segments;
-extern void *		loaded_segments;
 
+struct trampoline_data {
+	uint32_t	kernel_entry;
+	uint32_t	dtb;
+	uint32_t	phys_mem_offset;
+	uint32_t	of_entry;
+	uint32_t	mdp;
+	uint32_t	mdp_size;
+};
+
 vm_offset_t md_load64(char *args, vm_offset_t *modulep, vm_offset_t *dtb);
 
 int
@@ -70,53 +78,90 @@ ppc64_elf_exec(struct preloaded_file *fp)
 	int			error;
 	uint32_t		*trampoline;
 	uint64_t		entry;
-	vm_offset_t		trampolinebase;
+	uint64_t		trampolinebase;
+	struct trampoline_data	*trampoline_data;
+	int			nseg;
+	void			*kseg;
 
 	if ((fmp = file_findmetadata(fp, MODINFOMD_ELFHDR)) == NULL) {
 		return(EFTYPE);
 	}
 	e = (Elf_Ehdr *)&fmp->md_data;
 
-	/* Figure out where to put it */
+	/*
+	 * Figure out where to put it.
+	 *
+	 * Linux does not allow to do kexec_load into
+	 * any part of memory. Ask arch_loadaddr to
+	 * resolve the first available chunk of physical
+	 * memory where loading is possible (load_addr).
+	 *
+	 * Memory organization is shown below.
+	 * It is assumed, that text segment offset of
+	 * kernel ELF (KERNPHYSADDR) is non-zero,
+	 * which is true for PPC/PPC64 architectures,
+	 * where default is 0x100000.
+	 *
+	 * load_addr:                 trampoline code
+	 * load_addr + KERNPHYSADDR:  kernel text segment
+	 */
 	trampolinebase = archsw.arch_loadaddr(LOAD_RAW, NULL, 0);
-	
+	printf("Load address at %#jx\n", (uintmax_t)trampolinebase);
+	printf("Relocation offset is %#jx\n", (uintmax_t)elf64_relocation_offset);
+
 	/* Set up loader trampoline */
 	trampoline = malloc(szkerneltramp);
 	memcpy(trampoline, &kerneltramp, szkerneltramp);
+
 	/* Parse function descriptor for ELFv1 kernels */
 	if ((e->e_flags & 3) == 2)
 		entry = e->e_entry;
-	else
+	else {
 		archsw.arch_copyout(e->e_entry + elf64_relocation_offset,
 		    &entry, 8);
-	trampoline[2] = entry + elf64_relocation_offset;
-	trampoline[4] = 0; /* Phys. mem offset */
-	trampoline[5] = 0; /* OF entry point */
+		entry = be64toh(entry);
+	}
 
+	/*
+	 * Placeholder for trampoline data is at trampolinebase + 0x08
+	 * CAUTION: all data must be Big Endian
+	 */
+	trampoline_data = (void*)&trampoline[2];
+	trampoline_data->kernel_entry = htobe32(entry + elf64_relocation_offset);
+	trampoline_data->phys_mem_offset = htobe32(0);
+	trampoline_data->of_entry = htobe32(0);
+
 	if ((error = md_load64(fp->f_args, &mdp, &dtb)) != 0)
 		return (error);
 
-	trampoline[3] = dtb;
-	trampoline[6] = mdp;
-	trampoline[7] = 0xfb5d104d;
-	printf("Kernel entry at %#jx (%#x) ...\n", e->e_entry, trampoline[2]);
-	printf("DTB at %#x, mdp at %#x\n", dtb, mdp);
+	trampoline_data->dtb = htobe32(dtb);
+	trampoline_data->mdp = htobe32(mdp);
+	trampoline_data->mdp_size = htobe32(0xfb5d104d);
 
+	printf("Kernel entry at %#jx (%#x) ...\n",
+	    entry, be32toh(trampoline_data->kernel_entry));
+	printf("DTB at %#x, mdp at %#x\n",
+	    be32toh(trampoline_data->dtb), be32toh(trampoline_data->mdp));
+
 	dev_cleanup();
 
 	archsw.arch_copyin(trampoline, trampolinebase, szkerneltramp);
 	free(trampoline);
 
-	error = kexec_load(trampolinebase, nkexec_segments, &loaded_segments);
+	if (archsw.arch_kexec_kseg_get == NULL)
+		panic("architecture did not provide kexec segment mapping");
+	archsw.arch_kexec_kseg_get(&nseg, &kseg);
+
+	error = kexec_load(trampolinebase, nseg, (uintptr_t)kseg);
 	if (error != 0)
 		panic("kexec_load returned error: %d", error);
+
 	error = host_reboot(0xfee1dead, 672274793,
-	    0x45584543 /* LINUX_REBOOT_CMD_KEXEC */, NULL);
+	    0x45584543 /* LINUX_REBOOT_CMD_KEXEC */, (uintptr_t)NULL);
 	if (error != 0)
 		panic("reboot returned error: %d", error);
-	while (1) {}
 
-	panic("exec returned");
+	while (1) {}
 }
 
 struct file_format	ppc_elf64 =



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201801290924.w0T9OSix008403>