Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 02 Sep 2023 06:24:16 +0000
From:      "Mathew\, Cherry G.*" <c@bow.st>
To:        freebsd-hackers@freebsd.org
Subject:   ARC model specified in spinroot/promela
Message-ID:  <85jzt96qjz.fsf@bow.st>

next in thread | raw e-mail | index | archive | help
--=-=-=
Content-Type: text/plain

Hello hackers,

I'm writing to introduce a project I've been working on off-and-on for a
while now - verifying parts of kernel code using a formal specifier[1]

Please find attached a patch to try out a formal verification run of the
Adaptive Replacement Cache by Megido et.al. [2]

You can try it out by installing spin from your favourite package
manager, and then running "make" in the current directory - it just
needs the usual C toolchain, afaik.

I'm hoping that someone can help me complete the current run, as I don't
have the computing resources required to run the full model (about 16GB
RAM). Meanwhile I'll keep finding ways to reduce the statespace
required.

The idea here is that once verified, the model can be written up as C
code, and then re-exracted using the modex [3] tool

I have some unpublished work that demonstrates this using the md(4)
NetBSD driver, but I want to be able to do a clean-room implementation
first, in order to get a better sense of a developer methodology I'm
going to call "DDD" or "D3" (Design Driven Development).

Bricks and boquets welcome. (Please Cc: me, as I am not subscribed to
the ML)

[1] https://spinroot.com/spin/whatispin.html
[2] https://www.usenix.org/legacy/events/fast03/tech/full_papers/megiddo/megiddo.pdf Fig. 4
[3] http://spinroot.com/modex/MANUAL.html

Best,
-- 
~cherry


--=-=-=
Content-Type: text/x-patch
Content-Disposition: inline; filename=arc.diff

diff -urN arc.null/arc.drv arc/arc.drv
--- arc.null/arc.drv	1970-01-01 00:00:00.000000000 +0000
+++ arc/arc.drv	2023-09-02 05:31:42.000000000 +0000
@@ -0,0 +1,97 @@
+/* $NetBSD$ */
+
+/* Spin model driver for NetBSD arc(9) arc.c written by cherry */
+
+/* XXX: Move these into a set of library includes ? */
+/* XXX: Equivalence verification */
+/* Note: CAS implemented in an atomic {} block */
+#define mutex_enter(_mutex)			\
+	atomic {				\
+		(_mutex == 0) -> _mutex = 1;	\
+	}
+
+#define mutex_exit(_mutex)			\
+	atomic {				\
+		assert(_mutex == 1);		\
+		(_mutex == 1) -> _mutex = 0;	\
+	}
+
+#define N_ITEMS 6 /* Number of cache items to test with */
+#define IID_INVAL N_ITEMS /* Valid item.iid must be < N_ITEMS */
+#define C 5 /* Cache size - use judiciously - adds to statespace */
+#define ITEM_REPS 6 /* Max repeat item requests */
+
+typedef arc_item {
+	byte iid;    /* Unique identifier for item */
+	bool cached;
+};
+
+/* Note that we use the arc_item.iid as the member lookup handle to reduce state space */
+typedef arc_list {
+	chan item_list  = [C + 1] of { byte }; /* A list of page items */
+};
+
+#define lengthof(_arc_list) len(_arc_list.item_list)
+#define memberof(_arc_list, _arc_item) _arc_list.item_list??[eval(_arc_item.iid)]
+#define addMRU(_arc_list, _arc_item) _arc_list.item_list!_arc_item.iid
+#define readLRU(_arc_list, _arc_item) _arc_list.item_list?<_arc_item.iid>
+#define delLRU(_arc_list) _arc_list.item_list?_
+#define delitem(_arc_list, _arc_item) _arc_list.item_list??eval(_arc_item.iid)
+#define refreshitemto(_src_arc_list, _dest_arc_list, _arc_item)	\
+	d_step {		   		   		\
+	       delitem(_src_arc_list, _arc_item);		\
+	       addMRU(_dst_arc_list, _arc_item);		\
+	}
+#define refreshitem(_arc_list, _arc_item) refreshitemto(_arc_list, _arc_list, _arc_item)
+
+#define cachefetch(_arc_item) _arc_item.cached = true
+#define cacheremove(_arc_item) _arc_item.cached = false
+
+#define min(a, b) ((a < b) -> a : b)
+#define max(a, b) ((a > b) -> a : b)
+
+/* Declare arc lists */
+arc_list B1, B2, T1, T2;
+
+#define init_arc_item(_arc_item, _iid, _cached)		\
+	d_step {		       			\
+		_arc_item.iid = _iid;	       		\
+		_arc_item.cached = _cached;		\
+	}
+	
+hidden arc_item _x[N_ITEMS]; /* Input state is irrelevant from a verification PoV */
+hidden byte _x_iid = 0;
+hidden byte _item_rep = 0;
+
+/* Temp variable to hold LRU item */
+hidden arc_item LRUitem;
+
+/* Adaptation "delta" variables */
+hidden byte d1, d2;
+byte p = 0;
+
+bit sc_lock;
+
+/* Drive the procs */
+init {
+
+	atomic {
+                _x_iid = 0;
+		do
+                :: _x_iid < N_ITEMS ->
+		   	   init_arc_item(_x[_x_iid], _x_iid, false);
+			   _item_rep = 0;
+			   do
+			   :: _item_rep < (_x_iid % ITEM_REPS) ->
+			      		run p_arc(_x[_x_iid]);
+					_item_rep++;
+			   :: _item_rep >= (_x_iid % ITEM_REPS) ->
+			      		break;
+			   od
+			   _x_iid++;
+		:: _x_iid >= N_ITEMS ->
+			break
+		od
+	}
+
+}
diff -urN arc.null/arc.inv arc/arc.inv
--- arc.null/arc.inv	1970-01-01 00:00:00.000000000 +0000
+++ arc/arc.inv	2023-09-02 05:31:42.000000000 +0000
@@ -0,0 +1,19 @@
+/* $NetBSD$ */
+
+/* These are Linear Temporal Logic invariants (and constraints)
+ * applied over the statespace created by the promela
+ * specification. Correctness is implied by Logical consistency.
+ */
+ltl 
+{
+	/* Liveness - all thread finally end */
+	eventually always (_nr_pr == 1) &&
+	/* Might look obvious, but make this explicit */
+	always ((lengthof(T1) <= C) &&
+	        (lengthof(B1) <= C) &&
+	        (lengthof(T2) <= C) &&
+	        (lengthof(B2) <= C)   ) &&
+	/* Not strictly true, but this forces a good driver algo */
+	eventually (p > 0)
+		
+}
diff -urN arc.null/arc.pml arc/arc.pml
--- arc.null/arc.pml	1970-01-01 00:00:00.000000000 +0000
+++ arc/arc.pml	2023-09-02 05:31:42.000000000 +0000
@@ -0,0 +1,194 @@
+/* $NetBSD$ */
+
+/* Spin process model for NetBSD arc(9) arc.c written by cherry */
+
+/*
+ * We implement the following algorithm from page 10, Figure 4.
+ * https://www.usenix.org/legacy/events/fast03/tech/full_papers/megiddo/megiddo.pdf
+ *
+ *
+ *  ARC(c)
+ *  
+ *  INPUT: The request stream x1,x2,....,xt,....
+ *  INITIALIZATION: Set p = 0 and set the LRU lists T1, B1, T2, and B2 to empty.
+ *  
+ *  For every t>=1 and any xt, one and only one of the following four cases must occur.
+ *  Case I: xt is in T1 or T2. A cache hit has occurred in ARC(c) and DBL(2c).
+ *       Move xt to MRU position in T2.
+ *  
+ *  Case II: xt is in B1. A cache miss (resp. hit) has occurred in ARC(c) (resp. DBL(2c)).
+ *       	 ADAPTATION: Update p = min { p + d1,c }
+ *  	 	     where d1 = { 1 if |B1| >= |B2|, |B2|/|B1| otherwise
+ *  
+ *       REPLACE(xt, p). Move xt from B1 to the MRU position in T2 (also fetch xt to the cache).
+ *  
+ *  Case III: xt is in B2. A cache miss (resp. hit) has occurred in ARC(c) (resp. DBL(2c)).
+ *       	 ADAPTATION: Update p = max { p - d2,0 }
+ *  	 	     where d2 = { 1 if |B2| >= |B1|, |B1|/|B2| otherwise
+ *  
+ *       REPLACE(xt, p). Move xt from B2 to the MRU position in T2 (also fetch xt to the cache).
+ *       
+ *  Case IV: xt is not in T1 U B1 U T2 U B2. A cache miss has occurred in ARC(c) and DBL(2c).
+ *       Case A: L1 = T1 U B1 has exactly c pages.
+ *       	  If (|T1| < c)
+ *  	     	     	Delete LRU page in B1. REPLACE(xt,p).
+ *  	  	  else
+ *			Here B1 is empty. Delete LRU page in T1 (also remove it from the cache).
+ *  	  	  endif
+ *       Case B: L1 = T1 U B1 has less than c pages.
+ *       	  If (|T1| + |T2| + |B1| + |B2| >= c)
+ *  	             Delete LRU page in B2, if (|T1| + |T2| + |B1| + |B2| = 2c).
+ *  		     REPLACE(xt, p).
+ *  	  	  endif
+ *  
+ *       Finally, fetch xt to the cache and move it to MRU position in T1.
+ *  
+ *  Subroutine REPLACE(xt,p)
+ *       If ( (|T1| is not empty) and ((|T1| exceeds the target p) or (xt is in B2 and |T1| = p)) )
+ *       	  Delete the LRU page in T1 (also remove it from the cache), and move it to MRU position in B1.
+ *       else
+ *		  Delete the LRU page in T2 (also remove it from the cache), and move it to MRU position in B2.
+ *       endif
+ */
+ 
+inline REPLACE(/* arc_item */ x_t, /* byte */ p)
+{
+	/*
+	 * Since LRUitem is declared in scope p_ARC, we expect it to be only accessible from there and REPLACE()
+	 * as REPLACE() is only expected to be called from p_ARC.
+	 * XXX: May need to revisit due to Modex related limitations.
+	 */
+	init_arc_item(LRUitem, IID_INVAL, false);
+	
+	if
+		::
+		(lengthof(T1) != 0) &&
+		((lengthof(T1) > p) || (memberof(B2, x_t) && (lengthof(T1) == p)))
+		->
+		d_step {
+		       readLRU(T1, LRUitem);
+		       delLRU(T1);
+		       cacheremove(LRUitem);
+		       addMRU(B1, LRUitem);
+		}
+
+		::
+		else
+		->
+		d_step {
+		       readLRU(T2, LRUitem);
+		       delLRU(T2);
+		       cacheremove(LRUitem);
+		       addMRU(B2, LRUitem);
+		}
+	fi
+}
+
+proctype p_arc(arc_item x_t)
+{
+	/* Serialise entry */	
+
+	mutex_enter(sc_lock);
+
+	if
+		:: /* Case I */
+		memberof(T1, x_t)
+		->
+		d_step {
+		       delitem(T1, x_t);
+		       addMRU(T2, x_t);
+		}
+		:: /* Case I */
+		memberof(T2, x_t)
+		->
+		d_step {
+		       delitem(T2, x_t);
+		       addMRU(T2, x_t);
+		}
+		:: /* Case II */
+		memberof(B1, x_t)
+		->
+		d1 = ((lengthof(B1) >= lengthof(B2)) -> 1 : (lengthof(B2)/lengthof(B1)));
+		p = min((p + d1), C);
+
+		REPLACE(x_t, p);
+		d_step {
+		       delitem(B1, x_t);
+		       addMRU(T2, x_t);
+		       cachefetch(x_t);
+		}
+		:: /* Case III */
+		memberof(B2, x_t)
+		->
+		d2 = ((lengthof(B2) >= lengthof(B1)) -> 1 : (lengthof(B1)/lengthof(B2)));
+		p = max(p - d2, 0);
+		
+		REPLACE(x_t, p);
+		d_step {
+		       delitem(B2, x_t);
+		       addMRU(T2, x_t);
+		       cachefetch(x_t);
+		}
+		:: /* Case IV */
+		!(memberof(T1, x_t) ||
+		  memberof(B1, x_t) ||
+		  memberof(T2, x_t) ||
+		  memberof(B2, x_t))
+		->
+		if
+			:: /* Case A */
+			((lengthof(T1) + lengthof(B1)) == C)
+			->
+			if
+				::
+				(lengthof(T1) < C)
+				->
+				delLRU(B1);
+				REPLACE(x_t, p);
+				::
+				else
+				->
+				assert(lengthof(B1) == 0);
+				d_step {
+				       readLRU(T1, LRUitem);
+				       delLRU(T1);
+				       cacheremove(LRUitem);
+				}
+			fi
+			:: /* Case B */
+			((lengthof(T1) + lengthof(B1)) < C)
+			->
+			if
+				::
+				((lengthof(T1) +
+				  lengthof(T2) +
+				  lengthof(B1) +
+				  lengthof(B2)) >= C)
+				->
+				if
+					::
+					((lengthof(T1) +
+				  	  lengthof(T2) +
+				  	  lengthof(B1) +
+				  	  lengthof(B2)) == (2 * C))
+					->
+					delLRU(B2);
+					REPLACE(x_t, p);
+					::
+					else
+					->
+					skip;
+				fi
+				::
+				else
+				->
+				skip;
+			fi
+		fi
+		cachefetch(x_t);
+		addMRU(T1, x_t);
+	fi
+
+	mutex_exit(sc_lock);
+}
+
diff -urN arc.null/Makefile arc/Makefile
--- arc.null/Makefile	1970-01-01 00:00:00.000000000 +0000
+++ arc/Makefile	2023-09-02 05:31:42.000000000 +0000
@@ -0,0 +1,89 @@
+# This set of spinroot related files were written by cherry
+# <c@bow.st> in the Gregorian Calendar year AD.2023, in the month
+# of February that year.
+#
+# We have two specification files and a properties file
+#
+# The properties file contains "constraint" sections
+# such as ltl or never claims (either or, not both).
+# The specification is divided into two files:
+# the file with suffix '.drv' is a "driver" which
+# instantiates processes that will ultimately "drive" the
+# models under test.
+# The file with the suffix '.pml' contains the process
+# model code, which, is intended to be the formal specification
+# for the code we are interested in writing in C.
+#
+# We process these files in slightly different ways during
+# the dev cycle, but broadly speaking, the idea is to create
+# a file called 'spinmodel.pml' which contains the final
+# model file that is fed to spin.
+#
+# Note that when we use the model extractor tool "modex" to
+# extract the 'specification' from C code written to implement
+# the model defined above. We use a 'harness' file (see file with
+# suffix '.prx' below.
+#
+# Once the harness has been run, spinmodel.pml should be
+# synthesised and processed as usual.
+# 
+# The broad idea is that software dev starts by writing the spec
+# first, validating the model, and then implementing the model in
+# C, after which we come back to extract the model from the C file
+# and cross check our implementation using spin.
+#
+# If things go well, the constraints specified in the '.ltl' file
+# should hold exactly for both the handwritten model, and the
+# extracted one.
+
+spin-gen: arc.pml arc.drv arc.inv
+	cp arc.pml model #mimic modex
+	cat arc.drv > spinmodel.pml;cat model >> spinmodel.pml;cat arc.inv >> spinmodel.pml;
+	spin -a spinmodel.pml
+
+spin-build: pan.*
+	cc -o pan pan.c
+
+spin-run: spin-build #XXX depend on pan
+	./pan -a #Generate arc.pml.trail	on error
+
+# You run the trace only if the spin run above failed and created a trail
+spin-trace: spinmodel.pml.trail
+	spin -t spinmodel.pml -p -g #  -p (statements) -g (globals) -l (locals) -s (send) -r (recv)
+	./pan -r spinmodel.pml.trail -g
+
+# Modex Extracts from arc.c to 'model' - see arc.prx
+# Unfortunately there doesn't seem to be a way to specify a filename
+# to generate
+SRC = $HOME/work/NetBSD-src
+
+modex-gen: arc.prx arc.c
+	#modex -v -w -D_KERNEL -I$obj/home/antix/work/NetBSD-src/destdir.amd64/usr/include -I$$HOME/work/NetBSD-src/obj/home/antix/work/NetBSD-src/sys/arch/amd64/compile/GENERIC/ -I$$HOME/work/NetBSD-src/sys arc.prx
+	#cat model > spinmodel.pml
+	touch ioconf.h # Pretend we have an Kern conf
+	modex -v -w arc.prx
+	cat arc.drv > spinmodel.pml;cat model >> spinmodel.pml;cat arc.inv >> spinmodel.pml;
+	spin -a spinmodel.pml #Sanity check
+
+modex-gen-clean:
+	rm -f ioconf.h # Temp - see above.
+	rm -f spinmodel.pml # Our consolidated model file
+	rm -f _spin_nvr.tmp # Never claim file
+	rm -f model # modex generated intermediate "model" file
+	rm -f pan.* # Spin generated source files
+	rm -f _modex* # modex generated script files
+	rm -f  *.I *.M
+spin-gen-clean:
+	rm -f spinmodel.pml # Our consolidated model file
+	rm -f _spin_nvr.tmp # Never claim file
+	rm -f model # Intermediate "model" file
+	rm -f pan.* # Spin generated source files
+
+spin-build-clean:
+	rm -f pan
+
+spin-run-clean:
+	rm -f spinmodel.pml.trail
+
+clean: modex-gen-clean spin-gen-clean spin-build-clean spin-run-clean
+#	rm -f *~

--=-=-=--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?85jzt96qjz.fsf>