Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 25 Oct 2014 20:51:41 +0300
From:      Konstantin Belousov <kostikbel@gmail.com>
To:        Mateusz Guzik <mjguzik@gmail.com>
Cc:        freebsd-arch@freebsd.org
Subject:   Re: syscalls from loadable modules compiled in statically into the kernel
Message-ID:  <20141025175141.GJ1877@kib.kiev.ua>
In-Reply-To: <20141025141557.GB20599@dft-labs.eu>
References:  <20141025022808.GA14551@dft-labs.eu> <20141025092234.GI1877@kib.kiev.ua> <20141025132039.GA20599@dft-labs.eu> <20141025141557.GB20599@dft-labs.eu>

next in thread | previous in thread | raw e-mail | index | archive | help
On Sat, Oct 25, 2014 at 04:15:57PM +0200, Mateusz Guzik wrote:
> On Sat, Oct 25, 2014 at 03:20:39PM +0200, Mateusz Guzik wrote:
> > On Sat, Oct 25, 2014 at 12:22:34PM +0300, Konstantin Belousov wrote:
> > > On Sat, Oct 25, 2014 at 04:28:09AM +0200, Mateusz Guzik wrote:
> > > > The kernel has the following mechanism:
> > > > 
> > > > int             
> > > > syscall_thread_enter(struct thread *td, struct sysent *se)
> > > > {               
> > > >         u_int32_t cnt, oldcnt;
> > > >                 
> > > >         do {            
> > > >                 oldcnt = se->sy_thrcnt;
> > > >                 if ((oldcnt & SY_THR_STATIC) != 0)
> > > >                         return (0);
> > > >                 if ((oldcnt & (SY_THR_DRAINING | SY_THR_ABSENT)) != 0)
> > > >                         return (ENOSYS);
> > > >                 cnt = oldcnt + SY_THR_INCR;
> > > >         } while (atomic_cmpset_acq_32(&se->sy_thrcnt, oldcnt, cnt) == 0);
> > > >         return (0);
> > > > }
> > > > 
> > > > Except it turns out that it is used even if given module (here: sysvshm) is
> > > > compiled in statically.
> > > > 
> > > > So my proposal is to give modules an easy way to tell whether they got
> > > > compiled in and extend syscall_register interface so that it would allow
> > > > registering static syscalls.
> > > > 
> > > > The latter could also be used by modules which are loadable, but don't
> > > > support unloads.
> > > > 
> > > > I don't have any good idea how to provide aforementioned detection
> > > > method though.
> > > The method would be a combination of some change to syscall_register()
> > > and #ifdef KLD_MODULE. Look at the sys/conf.h MAKEDEV_ETERNAL_KLD
> > > definition, which provides similar in spirit optimization for
> > > non-destructable cdevs.
> > > 
> > 
> > Ok, so I'll add sysctl_register_flags and SY_THR_STATIC_KLD + making
> > sure SY_THR_STATIC cannot be unregistered.
> > 
> 
> Turns out freebsd32 duplicates a lot of code and didn't receive some
> fixes regular syscall table support got. I decided to just patch it up
> without fixing that for now.
> 
> diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c
> index d909a71..8fe4b83 100644
> --- a/sys/compat/freebsd32/freebsd32_misc.c
> +++ b/sys/compat/freebsd32/freebsd32_misc.c
> @@ -2627,9 +2627,13 @@ freebsd32_xxx(struct thread *td, struct freebsd32_xxx_args *uap)
>  #endif
>  
>  int
> -syscall32_register(int *offset, struct sysent *new_sysent,
> -    struct sysent *old_sysent)
> +syscall32_register_flags(int *offset, struct sysent *new_sysent,
> +    struct sysent *old_sysent, int flags)
>  {
> +
> +	if ((flags & ~SY_THR_STATIC) != 0)
> +		return (EINVAL);
> +
>  	if (*offset == NO_SYSCALL) {
>  		int i;
>  
> @@ -2648,15 +2652,26 @@ syscall32_register(int *offset, struct sysent *new_sysent,
>  
>  	*old_sysent = freebsd32_sysent[*offset];
>  	freebsd32_sysent[*offset] = *new_sysent;
> +	atomic_store_rel_32(&freebsd32_sysent[*offset].sy_thrcnt, flags);
>  	return 0;
Fix style while you are there; the line should be return _(_0_)_;

>  }
>  
>  int
> +syscall32_register(int *offset, struct sysent *new_sysent,
> +    struct sysent *old_sysent)
> +{
> +
> +	return (syscall32_register_flags(offset, new_sysent, old_sysent, 0));
> +}
> +
> +int
>  syscall32_deregister(int *offset, struct sysent *old_sysent)
>  {
>  
> -	if (*offset)
> -		freebsd32_sysent[*offset] = *old_sysent;
> +	if (*offset == 0)
> +		return (0);
> +
> +	freebsd32_sysent[*offset] = *old_sysent;
>  	return 0;
>  }
>  
> @@ -2707,14 +2722,14 @@ syscall32_module_handler(struct module *mod, int what, void *arg)
>  }
>  
>  int
> -syscall32_helper_register(struct syscall_helper_data *sd)
> +syscall32_helper_register_flags(struct syscall_helper_data *sd, int flags)
>  {
>  	struct syscall_helper_data *sd1;
>  	int error;
>  
>  	for (sd1 = sd; sd1->syscall_no != NO_SYSCALL; sd1++) {
> -		error = syscall32_register(&sd1->syscall_no, &sd1->new_sysent,
> -		    &sd1->old_sysent);
> +		error = syscall32_register_flags(&sd1->syscall_no,
> +		    &sd1->new_sysent, &sd1->old_sysent, flags);
>  		if (error != 0) {
>  			syscall32_helper_unregister(sd);
>  			return (error);
> @@ -2725,6 +2740,13 @@ syscall32_helper_register(struct syscall_helper_data *sd)
>  }
>  
>  int
> +syscall32_helper_register(struct syscall_helper_data *sd)
> +{
> +
> +	return (syscall32_helper_register_flags(sd, 0));
> +}
> +
> +int
>  syscall32_helper_unregister(struct syscall_helper_data *sd)
>  {
>  	struct syscall_helper_data *sd1;
> diff --git a/sys/compat/freebsd32/freebsd32_util.h b/sys/compat/freebsd32/freebsd32_util.h
> index a5945cf..e473ef6 100644
> --- a/sys/compat/freebsd32/freebsd32_util.h
> +++ b/sys/compat/freebsd32/freebsd32_util.h
> @@ -97,10 +97,14 @@ SYSCALL32_MODULE(syscallname,                           \
>      .syscall_no = FREEBSD32_SYS_##syscallname			\
>  }
>  
> +int    syscall32_register_flags(int *offset, struct sysent *new_sysent,
> +	    struct sysent *old_sysent, int flags);
>  int    syscall32_register(int *offset, struct sysent *new_sysent,
>  	    struct sysent *old_sysent);
>  int    syscall32_deregister(int *offset, struct sysent *old_sysent);
>  int    syscall32_module_handler(struct module *mod, int what, void *arg);
> +int    syscall32_helper_register_flags(struct syscall_helper_data *sd,
> +	    int flags);
>  int    syscall32_helper_register(struct syscall_helper_data *sd);
>  int    syscall32_helper_unregister(struct syscall_helper_data *sd);
>  
> diff --git a/sys/kern/kern_syscalls.c b/sys/kern/kern_syscalls.c
> index 03f6088..30dd203 100644
> --- a/sys/kern/kern_syscalls.c
> +++ b/sys/kern/kern_syscalls.c
> @@ -104,11 +104,14 @@ syscall_thread_exit(struct thread *td, struct sysent *se)
>  }
>  
>  int
> -syscall_register(int *offset, struct sysent *new_sysent,
> -    struct sysent *old_sysent)
> +syscall_register_flags(int *offset, struct sysent *new_sysent,
> +    struct sysent *old_sysent, int flags)
>  {
>  	int i;
>  
> +	if ((flags & ~SY_THR_STATIC) != 0)
> +		return (EINVAL);
> +
>  	if (*offset == NO_SYSCALL) {
>  		for (i = 1; i < SYS_MAXSYSCALL; ++i)
>  			if (sysent[i].sy_call == (sy_call_t *)lkmnosys)
> @@ -127,18 +130,31 @@ syscall_register(int *offset, struct sysent *new_sysent,
>  	*old_sysent = sysent[*offset];
>  	new_sysent->sy_thrcnt = SY_THR_ABSENT;
>  	sysent[*offset] = *new_sysent;
> -	atomic_store_rel_32(&sysent[*offset].sy_thrcnt, 0);
> +	atomic_store_rel_32(&sysent[*offset].sy_thrcnt, flags);
>  	return (0);
>  }
>  
>  int
> +syscall_register(int *offset, struct sysent *new_sysent,
> +    struct sysent *old_sysent)
> +{
> +
> +	return (syscall_register_flags(offset, new_sysent, old_sysent, 0));
> +}
> +
> +int
>  syscall_deregister(int *offset, struct sysent *old_sysent)
>  {
> +	struct sysent *se;
>  
> -	if (*offset) {
> -		syscall_thread_drain(&sysent[*offset]);
> -		sysent[*offset] = *old_sysent;
> -	}
> +	if (*offset == 0)
> +		return (0); /* XXX? */
> +
> +	se = &sysent[*offset];
> +	if ((se->sy_thrcnt & SY_THR_STATIC) != 0)
> +		return (EINVAL);
> +	syscall_thread_drain(se);
> +	sysent[*offset] = *old_sysent;
>  	return (0);
>  }
>  
> @@ -190,14 +206,14 @@ syscall_module_handler(struct module *mod, int what, void *arg)
>  }
>  
>  int
> -syscall_helper_register(struct syscall_helper_data *sd)
> +syscall_helper_register_flags(struct syscall_helper_data *sd, int flags)
>  {
>  	struct syscall_helper_data *sd1;
>  	int error;
>  
>  	for (sd1 = sd; sd1->syscall_no != NO_SYSCALL; sd1++) {
> -		error = syscall_register(&sd1->syscall_no, &sd1->new_sysent,
> -		    &sd1->old_sysent);
> +		error = syscall_register_flags(&sd1->syscall_no,
> +		    &sd1->new_sysent, &sd1->old_sysent, flags);
>  		if (error != 0) {
>  			syscall_helper_unregister(sd);
>  			return (error);
> @@ -208,6 +224,13 @@ syscall_helper_register(struct syscall_helper_data *sd)
>  }
>  
>  int
> +syscall_helper_register(struct syscall_helper_data *sd)
> +{
> +
> +	return (syscall_helper_register_flags(sd, 0));
> +}
> +
> +int
>  syscall_helper_unregister(struct syscall_helper_data *sd)
>  {
>  	struct syscall_helper_data *sd1;
> diff --git a/sys/kern/sysv_msg.c b/sys/kern/sysv_msg.c
> index a572a0e..4fc04bc 100644
> --- a/sys/kern/sysv_msg.c
> +++ b/sys/kern/sysv_msg.c
> @@ -252,11 +252,12 @@ msginit()
>  	}
>  	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
>  
> -	error = syscall_helper_register(msg_syscalls);
> +	error = syscall_helper_register_flags(msg_syscalls, SY_THR_STATIC_KLD);
>  	if (error != 0)
>  		return (error);
>  #ifdef COMPAT_FREEBSD32
> -	error = syscall32_helper_register(msg32_syscalls);
> +	error = syscall32_helper_register_flags(msg32_syscalls,
> +	    SY_THR_STATIC_KLD);
>  	if (error != 0)
>  		return (error);
>  #endif
> diff --git a/sys/kern/sysv_sem.c b/sys/kern/sysv_sem.c
> index c632902..dc1c66a 100644
> --- a/sys/kern/sysv_sem.c
> +++ b/sys/kern/sysv_sem.c
> @@ -278,11 +278,12 @@ seminit(void)
>  	semexit_tag = EVENTHANDLER_REGISTER(process_exit, semexit_myhook, NULL,
>  	    EVENTHANDLER_PRI_ANY);
>  
> -	error = syscall_helper_register(sem_syscalls);
> +	error = syscall_helper_register_flags(sem_syscalls, SY_THR_STATIC_KLD);
>  	if (error != 0)
>  		return (error);
>  #ifdef COMPAT_FREEBSD32
> -	error = syscall32_helper_register(sem32_syscalls);
> +	error = syscall32_helper_register_flags(sem32_syscalls,
> +	    SY_THR_STATIC_KLD);
>  	if (error != 0)
>  		return (error);
>  #endif
> diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c
> index 3480d11..b3132c3 100644
> --- a/sys/kern/sysv_shm.c
> +++ b/sys/kern/sysv_shm.c
> @@ -910,11 +910,12 @@ shminit()
>  	shmexit_hook = &shmexit_myhook;
>  	shmfork_hook = &shmfork_myhook;
>  
> -	error = syscall_helper_register(shm_syscalls);
> +	error = syscall_helper_register_flags(shm_syscalls, SY_THR_STATIC_KLD);
>  	if (error != 0)
>  		return (error);
>  #ifdef COMPAT_FREEBSD32
> -	error = syscall32_helper_register(shm32_syscalls);
> +	error = syscall32_helper_register_flags(shm32_syscalls,
> +	    SY_THR_STATIC_KLD);
>  	if (error != 0)
>  		return (error);
>  #endif
> diff --git a/sys/netinet/sctp_syscalls.c b/sys/netinet/sctp_syscalls.c
> index 3d0f549..8170ca8 100644
> --- a/sys/netinet/sctp_syscalls.c
> +++ b/sys/netinet/sctp_syscalls.c
> @@ -94,11 +94,11 @@ sctp_syscalls_init(void *unused __unused)
>  {
>  	int error;
>  
> -	error = syscall_helper_register(sctp_syscalls);
> +	error = syscall_helper_register_flags(sctp_syscalls, SY_THR_STATIC);
Why sctp does this at all ?  It cannot be loaded.
Seems to be just a mishandling of conditional compilation ?

>  	KASSERT((error == 0),
>  	    ("%s: syscall_helper_register failed for sctp syscalls", __func__));
>  #ifdef COMPAT_FREEBSD32
> -	error = syscall32_helper_register(sctp_syscalls);
> +	error = syscall32_helper_register_flags(sctp_syscalls, SY_THR_STATIC);
>  	KASSERT((error == 0),
>  	    ("%s: syscall32_helper_register failed for sctp syscalls",
>  	    __func__));
> diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h
> index 0f1c256..12bc518 100644
> --- a/sys/sys/sysent.h
> +++ b/sys/sys/sysent.h
> @@ -76,6 +76,12 @@ struct sysent {			/* system call table */
>  #define	SY_THR_ABSENT	0x4
>  #define	SY_THR_INCR	0x8
>  
> +#ifdef KLD_MODULE
> +#define	SY_THR_STATIC_KLD	0
> +#else
> +#define	SY_THR_STATIC_KLD	0x1
> +#endif
This is crude. AFAIU, it should be SY_THR_STATIC instead of 0x1.

> +
>  struct image_params;
>  struct __sigset;
>  struct syscall_args;
> @@ -241,10 +247,13 @@ struct syscall_helper_data {
>      .syscall_no = NO_SYSCALL					\
>  }
>  
> +int	syscall_register_flags(int *offset, struct sysent *new_sysent,
> +	    struct sysent *old_sysent, int flags);
>  int	syscall_register(int *offset, struct sysent *new_sysent,
>  	    struct sysent *old_sysent);
>  int	syscall_deregister(int *offset, struct sysent *old_sysent);
>  int	syscall_module_handler(struct module *mod, int what, void *arg);
> +int	syscall_helper_register_flags(struct syscall_helper_data *sd, int flags);
>  int	syscall_helper_register(struct syscall_helper_data *sd);
>  int	syscall_helper_unregister(struct syscall_helper_data *sd);
I was told many times to not bloat the KPI.  What you do is fine for
merge to stable/10, while for HEAD, add flags argument to syscall_register()
and do not provide wrapper function.

>  
> 
> 
> > > > 
> > > > Also, please see https://reviews.freebsd.org/D1007 which moves
> > > > SY_THR_STATIC check to an inline function, saving us 2 function calls on
> > > > each syscall.
> > > 
> > > Did you benchmarked this ?  I dislike the code bloat.
> > 
> > with syscall_timing from tools/tools ministat says +4% for getuid and +1
> > for pipe+close.
> > 
> > -- 
> > Mateusz Guzik <mjguzik gmail.com>
> 
> -- 
> Mateusz Guzik <mjguzik gmail.com>



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20141025175141.GJ1877>