Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 12 Nov 2015 17:25:37 +0100
From:      Michael Tuexen <tuexen@freebsd.org>
To:        Konstantin Belousov <kostikbel@gmail.com>
Cc:        freebsd-arm <freebsd-arm@freebsd.org>
Subject:   Re: Memory management issue on RPi?
Message-ID:  <BE0B4761-54EC-4632-BA23-A3C373D419AE@freebsd.org>
In-Reply-To: <20151112121825.GJ2257@kib.kiev.ua>
References:  <CB20D8FA-303C-4AA2-B2A6-1FF25DDB8A94@freebsd.org> <20151112121825.GJ2257@kib.kiev.ua>

next in thread | previous in thread | raw e-mail | index | archive | help
> On 12 Nov 2015, at 13:18, Konstantin Belousov <kostikbel@gmail.com> =
wrote:
>=20
> On Thu, Nov 12, 2015 at 11:28:07AM +0100, Michael Tuexen wrote:
>> Dear all,
>>=20
>> I'm experiencing a behaviour I don't expect.
>> When running FreeBSD head on a RPI B+ without swap space
>> it shows the following behaviour on the console:
>>=20
>> [bsd10:~] tuexen% dd if=3D/dev/zero of=3Dlarge_file bs=3D1m =
count=3D1024
>> Nov 12 11:22:16 bsd10 kernel: pid 666 (sshd), uid 1002, was killed: =
out of swap space
>> Nov 12 11:22:19 bsd10 kernel: pid 606 (thttpd), uid 65534, was =
killed: out of swap space
>> Nov 12 11:22:24 bsd10 kernel: pid 316 (devd), uid 0, was killed: out =
of swap space
>> Killed
>> [bsd10:~] tuexen% Nov 12 11:22:27 bsd10 kernel: pid 676 (dd), uid =
1002, was killed: out of swap space
>> [bsd10:~] tuexen% uname -a
>> FreeBSD bsd10.fh-muenster.de 11.0-CURRENT FreeBSD 11.0-CURRENT #10 =
r290676: Wed Nov 11 20:23:53 CET 2015     =
tuexen@bsd10.fh-muenster.de:/home/tuexen/head/sys/arm/compile/RPI-B  arm
>> [bsd10:~] tuexen% ls -l large_file=20
>> -rw-r--r--  1 tuexen  tuexen  584056832 Nov 12 11:22 large_file
>>=20
>> Shouldn't I be able to use dd to generate an almost arbitrary large =
file (limited
>> by the filesystem, not by the memory)?
>=20
> This is a known problem with the swap-less OOM.  The following patch
> should give you an immediate relief.  You might want to tweak
> sysctl vm.pageout_oom_seq if default value is not right, it was =
selected
> by 'try and see' approach on very small (32 or 64MB) i386 VM.
It just works... Will do some more testing...

Best regards
Michael
>=20
> diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
> index a87f682..1fa61eb 100644
> --- a/sys/vm/vm_page.h
> +++ b/sys/vm/vm_page.h
> @@ -227,6 +227,7 @@ struct vm_domain {
> 	long vmd_segs;	/* bitmask of the segments */
> 	boolean_t vmd_oom;
> 	int vmd_pass;	/* local pagedaemon pass */
> +	int vmd_oom_seq;
> 	int vmd_last_active_scan;
> 	struct vm_page vmd_marker; /* marker for pagedaemon private use =
*/
> 	struct vm_page vmd_inacthead; /* marker for LRU-defeating =
insertions */
> diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
> index f564fb5..b956e25 100644
> --- a/sys/vm/vm_pageout.c
> +++ b/sys/vm/vm_pageout.c
> @@ -122,7 +122,8 @@ static void vm_pageout_init(void);
> static int vm_pageout_clean(vm_page_t m);
> static int vm_pageout_cluster(vm_page_t m);
> static void vm_pageout_scan(struct vm_domain *vmd, int pass);
> -static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass);
> +static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int =
page_shortage,
> +    int starting_page_shortage);
>=20
> SYSINIT(pagedaemon_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, =
vm_pageout_init,
>     NULL);
> @@ -158,6 +159,7 @@ SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, =
SI_ORDER_FIRST, kproc_start, &vm_kp);
> int vm_pages_needed;		/* Event on which pageout daemon sleeps =
*/
> int vm_pageout_deficit;		/* Estimated number of pages =
deficit */
> int vm_pageout_wakeup_thresh;
> +static int vm_pageout_oom_seq =3D 24;
>=20
> #if !defined(NO_SWAPPING)
> static int vm_pageout_req_swapout;	/* XXX */
> @@ -223,6 +225,10 @@ static int pageout_lock_miss;
> SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss,
> 	CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during =
pageout");
>=20
> +SYSCTL_INT(_vm, OID_AUTO, pageout_oom_seq,
> +	CTLFLAG_RW, &vm_pageout_oom_seq, 0,
> +	"back-to-back calls to oom detector to start OOM");
> +
> #define VM_PAGEOUT_PAGE_COUNT 16
> int vm_pageout_page_count =3D VM_PAGEOUT_PAGE_COUNT;
>=20
> @@ -1041,7 +1047,8 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
> 	vm_object_t object;
> 	long min_scan;
> 	int act_delta, addl_page_shortage, deficit, error, maxlaunder, =
maxscan;
> -	int page_shortage, scan_tick, scanned, vnodes_skipped;
> +	int page_shortage, scan_tick, scanned, starting_page_shortage;
> +	int vnodes_skipped;
> 	boolean_t pageout_ok, queues_locked;
>=20
> 	/*
> @@ -1080,6 +1087,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
> 		page_shortage =3D vm_paging_target() + deficit;
> 	} else
> 		page_shortage =3D deficit =3D 0;
> +	starting_page_shortage =3D page_shortage;
>=20
> 	/*
> 	 * maxlaunder limits the number of dirty pages we flush per =
scan.
> @@ -1343,6 +1351,12 @@ relock_queues:
> 		(void)speedup_syncer();
>=20
> 	/*
> +	 * If the inactive queue scan fails repeatedly to meet its
> +	 * target, kill the largest process.
> +	 */
> +	vm_pageout_mightbe_oom(vmd, page_shortage, =
starting_page_shortage);
> +
> +	/*
> 	 * Compute the number of pages we want to try to move from the
> 	 * active queue to the inactive queue.
> 	 */
> @@ -1453,15 +1467,6 @@ relock_queues:
> 		}
> 	}
> #endif
> -
> -	/*
> -	 * If we are critically low on one of RAM or swap and low on
> -	 * the other, kill the largest process.  However, we avoid
> -	 * doing this on the first pass in order to give ourselves a
> -	 * chance to flush out dirty vnode-backed pages and to allow
> -	 * active pages to be moved to the inactive queue and reclaimed.
> -	 */
> -	vm_pageout_mightbe_oom(vmd, pass);
> }
>=20
> static int vm_pageout_oom_vote;
> @@ -1472,12 +1477,17 @@ static int vm_pageout_oom_vote;
>  * failed to reach free target is premature.
>  */
> static void
> -vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass)
> +vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
> +    int starting_page_shortage)
> {
> 	int old_vote;
>=20
> -	if (pass <=3D 1 || !((swap_pager_avail < 64 && =
vm_page_count_min()) ||
> -	    (swap_pager_full && vm_paging_target() > 0))) {
> +	if (starting_page_shortage <=3D 0 || starting_page_shortage !=3D
> +	    page_shortage)
> +		vmd->vmd_oom_seq =3D 0;
> +	else
> +		vmd->vmd_oom_seq++;
> +	if (vmd->vmd_oom_seq < vm_pageout_oom_seq) {
> 		if (vmd->vmd_oom) {
> 			vmd->vmd_oom =3D FALSE;
> 			atomic_subtract_int(&vm_pageout_oom_vote, 1);
> @@ -1485,6 +1495,12 @@ vm_pageout_mightbe_oom(struct vm_domain *vmd, =
int pass)
> 		return;
> 	}
>=20
> +	/*
> +	 * Do not follow the call sequence until OOM condition is
> +	 * cleared.
> +	 */
> +	vmd->vmd_oom_seq =3D 0;
> +
> 	if (vmd->vmd_oom)
> 		return;
>=20
> @@ -1510,6 +1526,37 @@ vm_pageout_mightbe_oom(struct vm_domain *vmd, =
int pass)
> 	atomic_subtract_int(&vm_pageout_oom_vote, 1);
> }
>=20
> +static long
> +vm_pageout_resident_count(struct vmspace *vmspace)
> +{
> +	vm_map_t map;
> +	vm_map_entry_t entry;
> +	vm_object_t obj;
> +	long res;
> +
> +	map =3D &vmspace->vm_map;
> +	KASSERT(!map->system_map, ("system map"));
> +	sx_assert(&map->lock, SA_LOCKED);
> +	res =3D 0;
> +	for (entry =3D map->header.next; entry !=3D &map->header;
> +	    entry =3D entry->next) {
> +		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) !=3D 0)
> +			continue;
> +		obj =3D entry->object.vm_object;
> +		if (obj =3D=3D NULL)
> +			continue;
> +		switch (obj->type) {
> +		case OBJT_DEFAULT:
> +		case OBJT_SWAP:
> +		case OBJT_VNODE:
> +		case OBJT_PHYS:
> +			res +=3D obj->resident_page_count;
> +			break;
> +		}
> +	}
> +	return (res);
> +}
> +
> void
> vm_pageout_oom(int shortage)
> {
> @@ -1554,7 +1601,8 @@ vm_pageout_oom(int shortage)
> 			if (!TD_ON_RUNQ(td) &&
> 			    !TD_IS_RUNNING(td) &&
> 			    !TD_IS_SLEEPING(td) &&
> -			    !TD_IS_SUSPENDED(td)) {
> +			    !TD_IS_SUSPENDED(td) &&
> +			    !TD_IS_SWAPPED(td)) {
> 				thread_unlock(td);
> 				breakout =3D 1;
> 				break;
> @@ -1582,12 +1630,13 @@ vm_pageout_oom(int shortage)
> 		}
> 		PROC_UNLOCK(p);
> 		size =3D vmspace_swap_count(vm);
> -		vm_map_unlock_read(&vm->vm_map);
> 		if (shortage =3D=3D VM_OOM_MEM)
> -			size +=3D vmspace_resident_count(vm);
> +			size +=3D vm_pageout_resident_count(vm);
> +		vm_map_unlock_read(&vm->vm_map);
> 		vmspace_free(vm);
> +
> 		/*
> -		 * if the this process is bigger than the biggest one
> +		 * If this process is bigger than the biggest one,
> 		 * remember it.
> 		 */
> 		if (size > bigsize) {
>=20
>=20
>=20




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?BE0B4761-54EC-4632-BA23-A3C373D419AE>