home *** CD-ROM | disk | FTP | other *** search
- Newsgroups: comp.theory.cell-automata
- Path: sparky!uunet!zaphod.mps.ohio-state.edu!cis.ohio-state.edu!news.sei.cmu.edu!fs7.ece.cmu.edu!crabapple.srv.cs.cmu.edu!hopkins
- From: hopkins@cs.cmu.edu (Don Hopkins)
- Subject: fast cellular automata implementation
- In-Reply-To: eeklund@nyx.cs.du.edu's message of Wed, 18 Nov 92 19:17:44 GMT
- Message-ID: <By30pu.E9s.1@cs.cmu.edu>
- Originator: hopkins@ECP.GARNET.CS.CMU.EDU
- Sender: news@cs.cmu.edu (Usenet News System)
- Nntp-Posting-Host: ecp.garnet.cs.cmu.edu
- Organization: School of Computer Science, Carnegie Mellon University
- References: <1992Nov17.122445.14209@odin.diku.dk>
- <1992Nov18.191744.16470@mnemosyne.cs.du.edu>
- Date: Sat, 21 Nov 1992 19:56:17 GMT
- Lines: 524
-
- eeklund@nyx.cs.du.edu (Eivind Eklund) writes:
-
- [...] And this is also nice for doing FAST implementations - see if
- you can find out how... (This might be common knowledge allready,
- but I have a hack which I believe to be VERY fast - somebody tell
- me how many cells/second they are able to do on what kinds of
- computers 8-) I am able to do approx 1.3 million cells/second on a
- 1 MIPS Amiga. (7.14 Mhz 68000)
-
- It's important to reduce the number of times you access memory, and
- keep as much as you can in registers. You could implement a pure
- counting rule pretty efficiently by moving a 3x3 window over the
- cells, and keeping a count of the 1's under it, incrementing your
- count by 0..3 depending on the leading edge and decrementing it by
- that much three steps later (when those cells fall off the trailing
- edge).
-
- Another way to make it go faster (that would work well with the above
- technique) is to eliminate the edge conditions in the loop, by making
- the temporary array 2x2 cells bigger than the destination array.
- Before applying the rule, copy the cells into the temporary array
- inset by 1,1 (so they're centered with a 1 cell perimeter all
- around), then set the cells around the perimeter to the corresponding
- wrapping edge (to form a topological torus), and loop over the inset
- cells to compute the output array. Since the loop's inset by one, you
- can address all neighbors uniformly like "west = src[-1], south =
- src[srcline]", instead of "west = src[x==0 ? srcline-1 : -1]", etc.
-
- I used this technique in my cellular automata machine implementation,
- that runs on HyperLook under OpenWindows 3.0. The SPARC binary (with
- the user interface that lets you draw with the mouse in the cells
- while it's running) is available via anonymous ftp from ftp.uu.net, in
- the "graphics/NeWS" directory (be sure to get the HyperLook runtime
- too). On a Sparc 2 it's quite fluid, and makes a pretty effective
- psychodellic doodle pad. (It also includes a free lava lamp.)
-
- Here's the source code for the guts of my CAM simulator.
- I've removed the cryptic user interface stuff and distilled it down,
- so the code can be easily reused. I generate the rule lookup tables
- using SPARC Forth, in a language mostly compatible with the one used
- in Margolis & Toffoli's Cellular Automata Machines. The simulator has
- a few built-in neighborhoods that index into the lookup table, and
- also several built-in rules implemented directory. You have to
- initialize the cells, rule, and lookup table yourself (although the
- build-in rules don't use the lookup table), and roll your own display
- and user interface (unless you just enjoy the intellectual
- satisfaction of knowing it's applying all those rules).
-
- Some day when I have the time I will port this to X11, probably using
- the Tk toolkit and TCL as an extension language, so you can define
- rules in TCL instead of Forth. (It doesn't matter how efficient the
- high level rule language is because it's just used to generate a
- lookup table which is used in the inner loop).
-
- My apologies for the gnarly cpp macros, but C is such a cheezy
- language, with a brain dead macro facility tacked on as an
- afterthought, that once you get past anything more complicated than
- "#define MAX_BUFFER_SIZE 32", you end up writing in two languages at
- once, interleaving them with backslashes, and omitting semicolons in
- strategic places. Note that "#define foo (bar)" is fundamentally
- different than "#define foo(bar)". The idea behind the design of the
- macros is that it should be possible to re-use and combine rule
- definitions in useful ways (like n_margolis_ph or n_eco).
-
- -Don
-
- /*
- * Cellular Automata Machine Simulator.
- * Copyright (C) 1992 by Don Hopkins. All rights reserved.
- * This program is provided for unrestricted use, provided that this
- * copyright message is preserved. There is no warranty, and no author
- * or distributer accepts responsibility for any damage caused by this
- * program.
- */
-
-
- #include <sys/types.h>
-
- u_char *ruletable;
- int ruletablesize;
- u_char *src, *dst;
- u_char srcline, dstline;
- int width, height;
- u_char phase = 0;
- u_char wrap = 3;
- int steps = 1;
- int flow;
- void (*rule)();
-
-
- /* Generate a fast Cellular Automata Machine inner loop body */
- #define CAM_LOOP_BODY(BODY) \
- { int y; \
- for (y=0; y<height; y++) { \
- long l0 = (src[0]<<8) + src[1], \
- l1 = (src[srcline]<<8) + src[srcline+1], \
- l2 = (src[srcline+srcline]<<8) + src[srcline+srcline+1]; \
- int x; \
- for (x=0; x<width; x++) { \
- l0 = (l0<<8) + src[2]; \
- l1 = (l1<<8) + src[srcline+2]; \
- l2 = (l2<<8) + src[srcline+srcline+2]; \
- BODY; \
- src++; dst++; \
- } \
- src += srcline - width; dst += dstline - width; \
- } \
- }
-
- /* Apply the rule and output the result */
- #define CAM_LOOP(RULE) \
- CAM_LOOP_BODY(*dst = (RULE))
-
- /* Rule defined by lookup table indexed by some neighborhood */
- #define CAM_TABLE_LOOP(NEIGHBORHOOD) \
- CAM_LOOP(ruletable[(NEIGHBORHOOD)])
-
- /* Extract neighboring cells from registers */
- #define NORTHWEST ((u_char)((l0>>16) & 0xff))
- #define NORTH ((u_char)((l0>>8) & 0xff))
- #define NORTHEAST ((u_char)(l0 & 0xff))
- #define WEST ((u_char)((l1>>16) & 0xff))
- #define CENTER ((u_char)((l1>>8) & 0xff))
- #define EAST ((u_char)(l1 & 0xff))
- #define SOUTHWEST ((u_char)((l2>>16) & 0xff))
- #define SOUTH ((u_char)((l2>>8) & 0xff))
- #define SOUTHEAST ((u_char)(l2 & 0xff))
-
- /* Eight neighbor sum of plane p (0..7) */
- #define SUM8p(p) (((l0>>p)&1) + ((l0>>(p+8))&1) + ((l0>>(p+16))&1) + \
- ((l1>>p)&1) + ((l1>>(p+16))&1) + \
- ((l2>>p)&1) + ((l2>>(p+8))&1) + ((l2>>(p+16))&1))
-
- /* Nine neighbor sum of plane p (0..7) */
- #define SUM9p(p) (SUM8p(p) + ((l1>>(p+8))&1))
-
- /* Eight and nine neighbor sum of plane 0 */
- #define SUM8 SUM8p(0)
- #define SUM9 SUM9p(0)
-
-
- init_cam(int w, int h)
- {
- void n_dheat();
-
- width = w; height = h;
-
- /* allocate output matrix of size width x height */
- dstline = width;
- dst = (u_char *)malloc(dstline * height);
-
- /* allocate temporary matrix of size width+2 x height+2 */
- srcline = width + 2;
- src = (u_char *)malloc(srcline * (height + 2) + (width + 2));
-
- rule = n_dheat;
- }
-
-
- do_rule()
- {
- int step;
-
- for (step=0; step < steps; step++) {
- int x, y;
- u_char *s = src + srcline + 1, /* offset by 1,1 */
- *d = dst;
-
- /*
- * Before applying the rule, we copy from the cells back from the dst
- * matrix to the temporary src matrix. The temporary matrix is 2x2 larger
- * than the actual matrix we're computing, which is copied into the center,
- * and the wrapping edges copied around the perimeter, to eliminate edge
- * condition tests from the inner loop.
- *
- * ff f0 f1 ... fe ff f0
- *
- * 0f 00 01 ... 0e 0f 00
- * 1f 10 11 ... 1e 1f 10
- * .. .. .. .. .. ..
- * ef e0 e1 ... ee ef e0
- * ff f0 f1 ... fe ff f0
- *
- * 0f 00 01 ... 0e 0f 00
- *
- * wrap value: effect:
- * 0 no effect
- * 1 copy cells, no wrap
- * 2 no copy, wrap edges
- * 3 copy cells, wrap edges
- * 4 copy cells, same edges
- */
-
- switch (wrap) {
- case 0:
- break;
- case 1:
- for (y=0; y<height; y++) {
- bcopy(d, s, width);
- s += srcline;
- d += dstline;
- }
- break;
- case 2:
- for (y=0; y<height; y++) {
- s[-1] = s[width-1];
- s[width] = s[0];
- s += srcline;
- d += dstline;
- }
- bcopy(src + srcline*height, src, srcline);
- bcopy(src + srcline, src + srcline*(height+1), srcline);
- break;
- case 3:
- for (y=0; y<height; y++) {
- bcopy(d, s, width);
- s[-1] = s[width-1];
- s[width] = s[0];
- s += srcline;
- d += dstline;
- }
- bcopy(src + srcline*height, src, srcline);
- bcopy(src + srcline, src + srcline*(height+1), srcline);
- break;
- case 4:
- for (y=0; y<height; y++) {
- bcopy(d, s, width);
- s[-1] = s[0];
- s[width] = s[width-1];
- s += srcline;
- d += dstline;
- }
- bcopy(src + srcline*height, src + (srcline * (height + 1)), srcline);
- bcopy(src + srcline, src, srcline);
- break;
- }
-
- (*rule)();
-
- phase = !phase;
-
- } /* for step */
- }
-
-
- void
- n_moore_a()
- {
- /* 0 1 2 3 4 5 6 7 8 9 */
- /* c c' se sw ne nw e w s n */
- /* 0x1 0x2 0x4 0x8 0x10 0x20 0x40 0x80 0x100 0x200 */
-
- #define MOORE_A ( \
- ((NORTHWEST&1)<<5) | ((NORTH&1)<<9) |((NORTHEAST&1)<<4) | \
- ((WEST&1)<<7) | (CENTER&3) | ((EAST&1)<<6) | \
- ((SOUTHWEST&1)<<3) | ((SOUTH&1)<<8) |((SOUTHEAST&1)<<2) \
- )
-
- CAM_TABLE_LOOP(MOORE_A)
- }
-
- void
- n_moore_ab()
- {
- /* 0 1 2 3 4 5 6 7 8 9 10 11 */
- /* c c' se sw ne nw e w s n &c &c' */
- /* 0x1 0x2 0x4 0x8 0x10 0x20 0x40 0x80 0x100 0x200 0x400 0x800 */
-
- #define MOORE_AB (MOORE_A | ((CENTER&12)<<8))
-
- CAM_TABLE_LOOP(MOORE_AB)
- }
-
- void
- n_vonn_neumann()
- {
- /* 0 1 2 3 4 5 6 7 8 9 */
- /* c c' e' w' s' n' e w s n */
- /* 0x1 0x2 0x4 0x8 0x10 0x20 0x40 0x80 0x100 0x200 */
-
- #define VON_NEUMANN ( \
- (CENTER&3) | \
- ((EAST&1)<<6) | ((EAST&2)<<1) | \
- ((WEST&1)<<7) | ((WEST&2)<<2) | \
- ((SOUTH&1)<<8) | ((SOUTH&2)<<3) | \
- ((NORTH&1)<<9) | ((NORTH&2)<<4) \
- )
-
- CAM_TABLE_LOOP(VON_NEUMANN)
- }
-
- void
- n_margolis()
- {
- register u_char i;
-
- /* 0 1 2 3 4 5 6 7 8 9 */
- /* c c' cw ccw opp cw' ccw' opp' */
- /* 0x1 0x2 0x4 0x8 0x10 0x20 0x40 0x80 0x100 0x200 */
-
- #define MARGOLIS_ODD ( \
- (CENTER & 3) | \
- (i=(x&1 ? (y&1 ? (EAST) : (NORTH)) \
- : (y&1 ? (SOUTH) : (WEST))), \
- (((i&1)<<2) | ((i&2)<<4))) | \
- (i=(x&1 ? (y&1 ? (SOUTH) : (EAST)) \
- : (y&1 ? (WEST) : (NORTH))), \
- (((i&1)<<3) | ((i&2)<<5))) | \
- (i=(x&1 ? (y&1 ? (SOUTHEAST):(NORTHEAST)) \
- : (y&1 ? (SOUTHWEST):(NORTHWEST))), \
- (((i&1)<<4) | ((i&2)<<6))) \
- )
-
- #define MARGOLIS_EVEN ( \
- (CENTER & 3) | \
- (i=(x&1 ? (y&1 ? (WEST) : (SOUTH)) \
- : (y&1 ? (NORTH) : (EAST))), \
- (((i&1)<<2) | ((i&2)<<4))) | \
- (i=(x&1 ? (y&1 ? (NORTH) : (WEST)) \
- : (y&1 ? (EAST) : (SOUTH))), \
- (((i&1)<<3) | ((i&2)<<5))) | \
- (i=(x&1 ? (y&1 ? (NORTHWEST) : (SOUTHWEST)) \
- : (y&1 ? (NORTHEAST) : (SOUTHEAST))), \
- (((i&1)<<4) | ((i&2)<<6))) \
- )
-
- if (phase) {
- CAM_TABLE_LOOP(MARGOLIS_ODD)
- } else {
- CAM_TABLE_LOOP(MARGOLIS_EVEN)
- }
- }
-
- void
- n_margolis_ph()
- {
- register u_char i;
-
- /* 0 1 2 3 4 5 6 7 8 9 */
- /* c c' cw ccw opp cw' ccw' opp' pha pha' */
- /* 0x1 0x2 0x4 0x8 0x10 0x20 0x40 0x80 0x100 0x200 */
-
- #define MARGOLIS_ODD_PH (MARGOLIS_ODD | 0x100)
- #define MARGOLIS_EVEN_PH (MARGOLIS_EVEN | 0x200)
-
- if (phase) {
- CAM_TABLE_LOOP(MARGOLIS_ODD_PH)
- } else {
- CAM_TABLE_LOOP(MARGOLIS_EVEN_PH)
- }
- }
-
- void
- n_margolis_hv()
- {
- register u_char i;
-
- /* 0 1 2 3 4 5 6 7 8 9 */
- /* c c' cw ccw opp cw' ccw' opp' horz vert */
- /* 0x1 0x2 0x4 0x8 0x10 0x20 0x40 0x80 0x100 0x200 */
-
- #define MARGOLIS_ODD_HV (MARGOLIS_ODD | ((x&1)<<8) | ((y&1)<<9))
- #define MARGOLIS_EVEN_HV (MARGOLIS_EVEN | ((x&1)<<8) | ((y&1)<<9))
-
- if (phase) {
- CAM_TABLE_LOOP(MARGOLIS_ODD_HV)
- } else {
- CAM_TABLE_LOOP(MARGOLIS_EVEN_HV)
- }
- }
-
- void
- n_life()
- {
- int s;
-
- #define LIFE ( \
- ((CENTER&1) ? (((s = SUM8) == 2) || (s == 3)) \
- : (SUM8 == 3)) | \
- (CENTER<<1) \
- )
-
- CAM_LOOP(LIFE)
- }
-
- void
- n_brain()
- {
- int s;
-
- #define BRAIN ( \
- (((((s = CENTER)&3) == 0) && (SUM8 == 2)) ? 1 : 0) | \
- (s<<1) \
- )
-
- CAM_LOOP(BRAIN)
- }
-
- void
- n_heat()
- {
-
- #define HEAT ( \
- ((long)(NORTHWEST + NORTH + NORTHEAST + \
- WEST + EAST + \
- SOUTHWEST + SOUTH + SOUTHEAST + flow)) >> 3 \
- )
-
- CAM_LOOP(HEAT)
- }
-
- void
- n_dheat()
- {
- int last = 0;
-
- #define DHEAT ( \
- ((last = (long)(NORTHWEST + NORTH + NORTHEAST + \
- WEST + EAST + \
- SOUTHWEST + SOUTH + SOUTHEAST + flow \
- + (last&0x07))), last >> 3) \
- )
-
- CAM_LOOP(DHEAT)
- }
-
- void
- n_lheat()
- {
-
- #define LHEAT ( \
- ((long)(NORTH + WEST + EAST + SOUTH + flow)) >> 2 \
- )
-
- CAM_LOOP(LHEAT)
- }
-
- void
- n_ldheat()
- {
- int last = 0;
-
- #define LDHEAT ( \
- ((last = (long)(NORTH + WEST + EAST + SOUTH + flow \
- + (last&0x03))), last >> 2) \
- )
-
- CAM_LOOP(LDHEAT)
- }
-
- void
- n_anneal()
- {
- int s;
-
- #define ANNEAL ( \
- ((s = SUM9) > 5) || (s == 4) \
- )
- CAM_LOOP(ANNEAL)
- }
-
- void
- n_anneal4()
- {
- int s;
-
- #define ANNEAL4 ( \
- ((((s = SUM9p(0)) > 5) || (s == 4)) ? 1 : 0) | \
- ((((s = SUM9p(1)) > 5) || (s == 4)) ? 2 : 0) | \
- ((((s = SUM9p(2)) > 5) || (s == 4)) ? 4 : 0) | \
- ((((s = SUM9p(3)) > 5) || (s == 4)) ? 8 : 0) | \
- (CENTER << 4) \
- )
- CAM_LOOP(ANNEAL4)
- }
-
- void
- n_anneal8()
- {
- int s;
-
- #define ANNEAL8 ( \
- ((((s = SUM9p(0)) > 5) || (s == 4)) ? 1 : 0) | \
- ((((s = SUM9p(1)) > 5) || (s == 4)) ? 2 : 0) | \
- ((((s = SUM9p(2)) > 5) || (s == 4)) ? 4 : 0) | \
- ((((s = SUM9p(3)) > 5) || (s == 4)) ? 8 : 0) | \
- ((((s = SUM9p(4)) > 5) || (s == 4)) ? 16 : 0) | \
- ((((s = SUM9p(5)) > 5) || (s == 4)) ? 32 : 0) | \
- ((((s = SUM9p(6)) > 5) || (s == 4)) ? 64 : 0) | \
- ((((s = SUM9p(7)) > 5) || (s == 4)) ? 128 : 0) \
- )
- CAM_LOOP(ANNEAL8)
- }
-
- void
- n_eco()
- {
- int s;
-
- #define ANTILIFE ( \
- ((CENTER&1) ? (SUM8 != 5) \
- : (((s = SUM8) != 5) && (s != 6))) | \
- (CENTER<<1) \
- )
-
- #define ECO ( \
- (((s = SUM9p(7)) > 5) || (s == 4) ? 128 : 0) | \
- ((CENTER&128) ? ((ANTILIFE)&127) : ((BRAIN)&127)) \
- )
- CAM_LOOP(ECO)
- }
-
- void
- n_torben()
- {
- int s;
-
- /* 0 0 0 1 0 1 0 1 1 1 */
-
- #define TORBEN ( \
- ((s = SUM9) > 6) || (s == 5) || (s == 3) \
- )
- CAM_LOOP(TORBEN)
- }
-