From owner-freebsd-i386@FreeBSD.ORG Fri Apr 11 08:52:41 2008 Return-Path: Delivered-To: freebsd-i386@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id EDB36106566C for ; Fri, 11 Apr 2008 08:52:41 +0000 (UTC) (envelope-from mjl@luckie.org.nz) Received: from mailfilter14.ihug.co.nz (mailfilter14.ihug.co.nz [203.109.136.14]) by mx1.freebsd.org (Postfix) with ESMTP id 749C58FC1D for ; Fri, 11 Apr 2008 08:52:41 +0000 (UTC) (envelope-from mjl@luckie.org.nz) X-IronPort-Anti-Spam-Filtered: true X-IronPort-Anti-Spam-Result: AgoFALDB/kd2XAcy/2dsb2JhbACBXapD X-IronPort-AV: E=Sophos;i="4.25,640,1199617200"; d="scan'208";a="75476751" Ironport-Content-Filter: send-to-smtp Ironport-OCF: send-to-smtp Received: from 118-92-7-50.dsl.dyn.ihug.co.nz (HELO spandex.luckie.org.nz) ([118.92.7.50]) by smtp.mailfilter4.ihug.co.nz with ESMTP/TLS/DHE-RSA-AES256-SHA; 11 Apr 2008 20:42:34 +1200 Received: from rayon.luckie.org.nz ([192.168.1.25]) by spandex.luckie.org.nz with esmtpsa (TLSv1:AES256-SHA:256) (Exim 4.69 (FreeBSD)) (envelope-from ) id 1JkEq8-0007Vz-4j for freebsd-i386@freebsd.org; Fri, 11 Apr 2008 20:42:32 +1200 Message-ID: <47FF2478.2050803@luckie.org.nz> Date: Fri, 11 Apr 2008 20:42:32 +1200 From: Matthew Luckie User-Agent: Thunderbird 2.0.0.9 (X11/20080129) MIME-Version: 1.0 To: freebsd-i386@freebsd.org Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Subject: i386/bpf_jit_machdep X-BeenThere: freebsd-i386@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: I386-specific issues for FreeBSD List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 11 Apr 2008 08:52:42 -0000 Here is a patch to the BPF JIT compiler, which I am sending here first to be reviewed by people who have a good chance of being able to spot any flaws. I'm inexperienced in i386 assembly. The things I have tried to achieve with the patch are as follows: - put a return zero target at the end of the procedure, rather than encode it in each BPF op code that needs it. My thought on this is that it seems likely that the length check will always work out, so it is better to put the unlikely result as the jump and fall through to what is most likely to occur. - BPF jump instructions are likely to have one fall through jump target. so don't encode both jump instructions. Review, please. --- /sys/i386/i386/bpf_jit_machdep.h.orig 2005-12-07 09:11:07.000000000 +1300 +++ /sys/i386/i386/bpf_jit_machdep.h 2008-04-11 19:08:40.000000000 +1200 @@ -78,6 +78,13 @@ u_int *refs; } bpf_bin_stream; +#define COMPUTE_TARGETS(stream, ins, jt, jf) do { \ + jt = stream.refs[stream.bpf_pc + ins->jt] - \ + stream.refs[stream.bpf_pc]; \ + jf = stream.refs[stream.bpf_pc + ins->jf] - \ + stream.refs[stream.bpf_pc]; \ +} while (0) + /* * Prototype of the emit functions. * @@ -330,6 +337,13 @@ } while (0) /* jne off32 */ +#define JNE(off32) do { \ + emitm(&stream, 0x0f, 1); \ + emitm(&stream, 0x85, 1); \ + emitm(&stream, off32, 4); \ +} while (0) + +/* jne off8 */ #define JNEb(off8) do { \ emitm(&stream, 0x75, 1); \ emitm(&stream, off8, 1); \ @@ -369,6 +383,27 @@ emitm(&stream, off32, 4); \ } while (0) +/* jb off32 */ +#define JB(off32) do { \ + emitm(&stream, 0x0f, 1); \ + emitm(&stream, 0x82, 1); \ + emitm(&stream, off32, 4); \ +} while (0) + +/* jl off32 */ +#define JL(off32) do { \ + emitm(&stream, 0x0f, 1); \ + emitm(&stream, 0x8c, 1); \ + emitm(&stream, off32, 4); \ +} while (0) + +/* jbe off32 */ +#define JBE(off32) do {\ + emitm(&stream, 0x0f, 1); \ + emitm(&stream, 0x86, 1); \ + emitm(&stream, off32, 4); \ +} while (0) + /* jg off32 */ #define JG(off32) do { \ emitm(&stream, 0x0f, 1); \ --- /sys/i386/i386/bpf_jit_machdep.c.orig 2006-01-04 09:26:02.000000000 +1300 +++ /sys/i386/i386/bpf_jit_machdep.c 2008-04-11 20:04:24.000000000 +1200 @@ -94,7 +94,7 @@ bpf_jit_compile(struct bpf_insn *prog, u_int nins, int *mem) { struct bpf_insn *ins; - u_int i, pass; + u_int i, pass, jt, jf; bpf_bin_stream stream; /* @@ -108,13 +108,13 @@ return NULL; /* Allocate the reference table for the jumps */ - stream.refs = (u_int *)malloc((nins + 1) * sizeof(u_int), + stream.refs = (u_int *)malloc((nins + 2) * sizeof(u_int), M_BPFJIT, M_NOWAIT); if (stream.refs == NULL) return NULL; /* Reset the reference table */ - for (i = 0; i < nins + 1; i++) + for (i = 0; i < nins + 2; i++) stream.refs[i] = 0; stream.cur_ip = 0; @@ -165,12 +165,8 @@ MOVrd(ESI, ECX); ADDib(ECX, sizeof(int)); CMPodd(ECX, EBP, 0x10); - JLEb(7); - ZERO_EAX(); - POP(EBX); - POP(ESI); - POP(EDI); - LEAVE_RET(); + JG(stream.refs[nins] - + stream.refs[stream.bpf_pc] + 5); MOVobd(EAX, EBX, ESI); BSWAP(EAX); break; @@ -181,11 +177,8 @@ MOVrd(ESI, ECX); ADDib(ECX, sizeof(short)); CMPodd(ECX, EBP, 0x10); - JLEb(5); - POP(EBX); - POP(ESI); - POP(EDI); - LEAVE_RET(); + JG(stream.refs[nins] - + stream.refs[stream.bpf_pc] + 6); MOVobw(AX, EBX, ESI); SWAP_AX(); break; @@ -194,11 +187,8 @@ ZERO_EAX(); MOVid(ECX, ins->k); CMPodd(ECX, EBP, 0x10); - JLEb(5); - POP(EBX); - POP(ESI); - POP(EDI); - LEAVE_RET(); + JG(stream.refs[nins] - + stream.refs[stream.bpf_pc] + 3); MOVobb(AL, EBX, ECX); break; @@ -216,12 +206,8 @@ MOVrd(ESI, ECX); ADDib(ECX, sizeof(int)); CMPodd(ECX, EBP, 0x10); - JLEb(7); - ZERO_EAX(); - POP(EBX); - POP(ESI); - POP(EDI); - LEAVE_RET(); + JG(stream.refs[nins] - + stream.refs[stream.bpf_pc] + 5); MOVobd(EAX, EBX, ESI); BSWAP(EAX); break; @@ -233,11 +219,8 @@ MOVrd(ESI, ECX); ADDib(ECX, sizeof(short)); CMPodd(ECX, EBP, 0x10); - JLEb(5); - POP(EBX); - POP(ESI); - POP(EDI); - LEAVE_RET(); + JG(stream.refs[nins] - + stream.refs[stream.bpf_pc] + 6); MOVobw(AX, EBX, ESI); SWAP_AX(); break; @@ -247,23 +230,16 @@ MOVid(ECX, ins->k); ADDrd(ECX, EDX); CMPodd(ECX, EBP, 0x10); - JLEb(5); - POP(EBX); - POP(ESI); - POP(EDI); - LEAVE_RET(); + JG(stream.refs[nins] - + stream.refs[stream.bpf_pc] + 3); MOVobb(AL, EBX, ECX); break; case BPF_LDX|BPF_MSH|BPF_B: MOVid(ECX, ins->k); CMPodd(ECX, EBP, 0x10); - JLEb(7); - ZERO_EAX(); - POP(EBX); - POP(ESI); - POP(EDI); - LEAVE_RET(); + JG(stream.refs[nins] - + stream.refs[stream.bpf_pc] + 11); ZERO_EDX(); MOVobb(DL, EBX, ECX); ANDib(DL, 0xf); @@ -313,70 +289,109 @@ break; case BPF_JMP|BPF_JGT|BPF_K: + COMPUTE_TARGETS(stream, ins, jt, jf); CMPid(EAX, ins->k); - /* 5 is the size of the following JMP */ - JG(stream.refs[stream.bpf_pc + ins->jt] - - stream.refs[stream.bpf_pc] + 5 ); - JMP(stream.refs[stream.bpf_pc + ins->jf] - - stream.refs[stream.bpf_pc]); + if(ins->jt == 0 && ins->jf != 0) + JLE(jf); + else if(ins->jt != 0 && ins->jf == 0) + JG(jt); + else { + JG(jt + 5); + JMP(jf); + } break; case BPF_JMP|BPF_JGE|BPF_K: + COMPUTE_TARGETS(stream, ins, jt, jf); CMPid(EAX, ins->k); - JGE(stream.refs[stream.bpf_pc + ins->jt] - - stream.refs[stream.bpf_pc] + 5); - JMP(stream.refs[stream.bpf_pc + ins->jf] - - stream.refs[stream.bpf_pc]); + if(ins->jt == 0 && ins->jf != 0) + JL(jf); + else if(ins->jt != 0 && ins->jf == 0) + JGE(jt); + else { + JGE(jt + 5); + JMP(jf); + } break; case BPF_JMP|BPF_JEQ|BPF_K: + COMPUTE_TARGETS(stream, ins, jt, jf); CMPid(EAX, ins->k); - JE(stream.refs[stream.bpf_pc + ins->jt] - - stream.refs[stream.bpf_pc] + 5); - JMP(stream.refs[stream.bpf_pc + ins->jf] - - stream.refs[stream.bpf_pc]); + if(ins->jt == 0 && ins->jf != 0) + JNE(jf); + else if(ins->jt != 0 && ins->jf == 0) + JE(jt); + else { + JE(jt + 5); + JMP(jf); + } break; case BPF_JMP|BPF_JSET|BPF_K: + COMPUTE_TARGETS(stream, ins, jt, jf); MOVrd(ECX, EAX); ANDid(ECX, ins->k); - JE(stream.refs[stream.bpf_pc + ins->jf] - - stream.refs[stream.bpf_pc] + 5); - JMP(stream.refs[stream.bpf_pc + ins->jt] - - stream.refs[stream.bpf_pc]); + if(ins->jt == 0 && ins->jf != 0) + JE(jf); + else if(ins->jt != 0 && ins->jf == 0) + JNE(jt); + else { + JE(jf + 5); + JMP(jt); + } break; case BPF_JMP|BPF_JGT|BPF_X: + COMPUTE_TARGETS(stream, ins, jt, jf); CMPrd(EAX, EDX); - JA(stream.refs[stream.bpf_pc + ins->jt] - - stream.refs[stream.bpf_pc] + 5); - JMP(stream.refs[stream.bpf_pc + ins->jf] - - stream.refs[stream.bpf_pc]); + if(ins->jt == 0 && ins->jf != 0) + JBE(jf); + else if(ins->jt != 0 && ins->jf == 0) + JA(jt); + else { + JA(jt + 5); + JMP(jf); + } break; case BPF_JMP|BPF_JGE|BPF_X: + COMPUTE_TARGETS(stream, ins, jt, jf); CMPrd(EAX, EDX); - JAE(stream.refs[stream.bpf_pc + ins->jt] - - stream.refs[stream.bpf_pc] + 5); - JMP(stream.refs[stream.bpf_pc + ins->jf] - - stream.refs[stream.bpf_pc]); + if(ins->jt == 0 && ins->jf != 0) + JB(jf); + else if(ins->jt != 0 && ins->jf == 0) + JAE(jt); + else { + JAE(jt + 5); + JMP(jf); + } break; case BPF_JMP|BPF_JEQ|BPF_X: + COMPUTE_TARGETS(stream, ins, jt, jf); CMPrd(EAX, EDX); - JE(stream.refs[stream.bpf_pc + ins->jt] - - stream.refs[stream.bpf_pc] + 5); - JMP(stream.refs[stream.bpf_pc + ins->jf] - - stream.refs[stream.bpf_pc]); + if(ins->jt == 0 && ins->jf != 0) + JNE(jf); + else if(ins->jt != 0 && ins->jf == 0) + JE(jt); + else { + JE(jt + 5); + JMP(jf); + } break; case BPF_JMP|BPF_JSET|BPF_X: + COMPUTE_TARGETS(stream, ins, jt, jf); MOVrd(ECX, EAX); ANDrd(ECX, EDX); - JE(stream.refs[stream.bpf_pc + ins->jf] - - stream.refs[stream.bpf_pc] + 5); - JMP(stream.refs[stream.bpf_pc + ins->jt] - - stream.refs[stream.bpf_pc]); + if(ins->jt == 0 && ins->jf != 0) + JE(jf); + else if(ins->jt != 0 && ins->jf == 0) + JNE(jt); + else { + JE(jf + 5); + JMP(jt); + } break; case BPF_ALU|BPF_ADD|BPF_X: @@ -395,12 +410,8 @@ case BPF_ALU|BPF_DIV|BPF_X: CMPid(EDX, 0); - JNEb(7); - ZERO_EAX(); - POP(EBX); - POP(ESI); - POP(EDI); - LEAVE_RET(); + JE(stream.refs[nins] - + stream.refs[stream.bpf_pc] + 8); MOVrd(ECX, EDX); ZERO_EDX(); DIVrd(ECX); @@ -479,6 +490,14 @@ ins++; } + /* procedure-wide return zero goes at the end */ + stream.bpf_pc++; + ZERO_EAX(); + POP(EBX); + POP(ESI); + POP(EDI); + LEAVE_RET(); + pass++; if (pass == 2) break; @@ -493,7 +512,7 @@ * modify the reference table to contain the offsets and * not the lengths of the instructions */ - for (i = 1; i < nins + 1; i++) + for (i = 1; i < nins + 2; i++) stream.refs[i] += stream.refs[i - 1]; /* Reset the counters */