home *** CD-ROM | disk | FTP | other *** search
Text File | 1989-10-23 | 39.2 KB | 1,314 lines |
- Path: wuarchive!cs.utexas.edu!uunet!bbn.com!rsalz
- From: rsalz@uunet.uu.net (Rich Salz)
- Newsgroups: comp.sources.unix
- Subject: v20i047: Plum-Hall benchmarks for timing common C operations
- Message-ID: <2054@papaya.bbn.com>
- Date: 24 Oct 89 00:22:51 GMT
- Lines: 1304
- Approved: rsalz@uunet.UU.NET
-
- Submitted-by: Eric S. Raymond <eric@snark.uu.net>
- Posting-number: Volume 20, Issue 47
- Archive-name: plum-benchmarks
-
- This is a simple set of benchmarks intended to give programmers timing
- information about common C operations. See the Makefile for instructions;
- basically, all you have to do is type `make', wait, and look at bench.out.
-
- A table of results for some popular machines is included in bench.tbl.
- For more information on the benchmark techniques and the philosophy
- behind them, browse the file ARTICLE. Here's an excerpt from the article:
-
- We are placing into the public domain some simple benchmarks with
- several appealing properties:
- They are short enough to type while browsing at trade shows.
-
- They are protected against overly-aggressive compiler optimizations.
-
- They reflect empirically-observed operator frequencies in C programs.
-
- They give a C programmer information directly relevant to programming.
-
- Enjoy!
- eric@snark.uu.net
-
-
- #!/bin/sh
- : "This is a shell archive, meaning: "
- : "1. Remove everything above the #! /bin/sh line. "
- : "2. Save the resulting test in a file. "
- : "3. Execute the file with /bin/sh (not csh) to create the files:"
- : " READ.ME"
- : " Makefile"
- : " benchdbl.c"
- : " benches.c"
- : " benchfn.c"
- : " benchlng.c"
- : " benchmul.c"
- : " benchreg.c"
- : " benchsho.c"
- : " clock.c"
- : " f3.c"
- : " fround.c"
- : " run-all.c"
- : " benches.out"
- : " run-all.out"
- : " run-all.bat"
- : " time-cmd.bat"
- : " time-dbl.bat"
- : " run-all.sh"
- : " cr-lf"
- : " n-n"
- : " bench.tbl"
- : " ARTICLE"
- echo file: READ.ME
- sed 's/^X//' >READ.ME << 'END-of-READ.ME'
- X The Plum-Hall Benchmarks
- X
- XThis is a simple set of benchmarks intended to give programmers timing
- Xinformation about common C operations. See the Makefile for instructions;
- Xbasically, all you have to do is type `make', wait, and look at bench.out.
- X
- XA table of results for some popular machines is included in bench.tbl.
- XFor more information on the benchmark techniques and the philosophy
- Xbehind them, browse the file ARTICLE.
- X
- X Enjoy!
- X eric@snark.uu.net
- END-of-READ.ME
- echo file: Makefile
- sed 's/^X//' >Makefile << 'END-of-Makefile'
- X#
- X# Makefile for the Plum-Hall benchmarks package
- X# by Eric S. Raymond (eric@snark.uu.net)
- X#
- X# Your system type for the benchmark list (do *not* string-quote it)
- XSYSTYPE = AT&T 6386/375
- X
- X# Ditto, if your headers define CLOCKS_PER_SEC
- XCFLAGS = -DCLOCKS_PER_SEC=60
- X
- X# Comment this out if you have ANSI clock(3)
- XCLOCK = clock.o
- X
- Xbench.out: nbench obench
- X nbench 1 "$(SYSTYPE) (no -O)" >bench.out
- X obench 1 "$(SYSTYPE) (-O)" >>bench.out
- X
- X# optimizer off
- Xnbench.o: benches.c
- X $(CC) $(CFLAGS) -c benches.c
- X mv benches.o nbench.o
- Xnbench: nbench.o f3.o $(CLOCK)
- X cc -o nbench nbench.o f3.o $(CLOCK) -lm
- X
- X# optimizer on
- Xobench.o: benches.c
- X $(CC) -O $(CFLAGS) -c benches.c
- X mv benches.o obench.o
- Xobench: obench.o f3.o $(CLOCK)
- X cc -o obench obench.o f3.o $(CLOCK) -lm
- X
- Xclean:
- X rm -f [no]bench *.o bench.shar
- X
- Xbench.shar:
- X shar READ.ME Makefile *.c *.out *.bat *.sh cr-lf n-n bench.tbl ARTICLE >bench.shar
- END-of-Makefile
- echo file: benchdbl.c
- sed 's/^X//' >benchdbl.c << 'END-of-benchdbl.c'
- X/* benchdbl - benchmark for double
- X * Thomas Plum, Plum Hall Inc, 609-927-3770
- X * If machine traps overflow, use an unsigned type
- X * Let T be the execution time in milliseconds
- X * Then average time per operator = T/major usec
- X * (Because the inner loop has exactly 1000 operations)
- X */
- X#define STOR_CL auto
- X#define TYPE double
- X#include <stdio.h>
- Xmain(ac, av)
- X int ac;
- X char *av[];
- X {
- X STOR_CL TYPE a, b, c;
- X long d, major, atol();
- X static TYPE m[10] = {0};
- X
- X major = atol(av[1]);
- X printf("executing %ld iterations\n", major);
- X a = b = (av[1][0] - '0');
- X for (d = 1; d <= major; ++d)
- X {
- X /* inner loop executes 1000 selected operations */
- X for (c = 1; c <= 40; ++c)
- X {
- X a = a + b + c;
- X b = a * 2;
- X a = b / 10;
- X a = -a;
- X b = -a - b - c;
- X a = b == c;
- X b = a + c;
- X a = !b;
- X b = a + c;
- X a = b > c;
- X }
- X }
- X printf("a=%d\n", a);
- X }
- END-of-benchdbl.c
- echo file: benches.c
- sed 's/^X//' >benches.c << 'END-of-benches.c'
- X/* benches - driver for Plum Hall benchmarks */
- X#include <stdio.h>
- X#include <time.h>
- X
- Xint benchreg(), benchsho(), benchlng();
- Xint benchmul(), benchfn(), benchdbl();
- X
- Xvoid tabulate();
- Xchar *fround();
- Xmain(argc, argv)
- X int argc;
- X char *argv[];
- X {
- X char result[6][10];
- X int i;
- X
- X if (argv[1][0] != '1')
- X printf("argv[1] must be 1 !\n");
- X if (argc < 3)
- X {
- X fprintf(stderr, "usage: benches 1 'compiler-id'\n");
- X exit(2);
- X }
- X tabulate(benchreg, result[0]);
- X tabulate(benchsho, result[1]);
- X tabulate(benchlng, result[2]);
- X tabulate(benchmul, result[3]);
- X tabulate(benchfn, result[4]);
- X tabulate(benchdbl, result[5]);
- X printf("\n\n");
- X printf("%20.20s %9s %9s %9s %9s %9s %9s\n",
- X "", "register", "auto", "auto", "int", "function", "auto");
- X printf("%20.20s %9s %9s %9s %9s %9s %9s\n",
- X "", "int", "short", "long", "multiply", "call+ret", "double");
- X printf("%22.22s ",
- X argv[2]);
- X for (i = 0; i <= 5; ++i)
- X printf("%9.9s ", result[i]);
- X printf("\n");
- X exit(0);
- X }
- Xvoid tabulate(fn, s)
- X void (*fn)();
- X char *s;
- X {
- X static char arg1[20];
- X static char *arga[3] = { "x", &arg1[0], 0 };
- X double before, after, microsec;
- X long major, major_next;
- X
- X major_next = 1;
- X do {
- X major = major_next;
- X sprintf(arg1, "%ld", major);
- X before = (double)clock();
- X (*fn)(2, arga);
- X after = (double)clock();
- X major_next *= 10;
- X } while (after-before < 100);
- X microsec = 1e3 * (after - before) / CLOCKS_PER_SEC / major;
- X sprintf(s, "%9s ", fround(microsec, 5, 3));
- X }
- X
- X/* fround - round double x to precision p, n significant digits
- X * uses static string for result - not re-entrant
- X * fround is an accomodation for K+R-level printf which lacks %.*e or %g
- X * slow, fat version - uses sprintf
- X */
- X#include <stdio.h>
- Xchar *fround(x, p, n)
- X double x;
- X short p;
- X short n;
- X {
- X double y;
- X double log10();
- X short digs;
- X short nlog;
- X static char s[40] = {0};
- X char fmt[20];
- X
- X sprintf(fmt, "%%.%de", n-1);
- X sprintf(s, fmt, x);
- X sscanf(s, "%lf", &y);
- X if (y == 0)
- X nlog = 0;
- X else
- X nlog = log10(y);
- X if (nlog < 0)
- X --nlog;
- X digs = n - nlog - 1;
- X if (digs < 0)
- X digs = 0;
- X else if (digs > p)
- X digs = p;
- X sprintf(fmt, "%%.%df", digs);
- X sprintf(s, fmt, y);
- X if (digs == 0)
- X strcat(s, ".");
- X while (digs++ < p)
- X strcat(s, " ");
- X return (s);
- X }
- X
- X
- X
- X
- X#define main benchreg
- X#include "benchreg.c"
- X
- X#undef main
- X#undef STOR_CL
- X#undef TYPE
- X#define main benchsho
- X#include "benchsho.c"
- X
- X#undef main
- X#undef STOR_CL
- X#undef TYPE
- X#define main benchlng
- X#include "benchlng.c"
- X
- X#undef main
- X#undef STOR_CL
- X#undef TYPE
- X#define main benchmul
- X#include "benchmul.c"
- X
- X#undef main
- X#undef STOR_CL
- X#undef TYPE
- X#define main benchfn
- X#include "benchfn.c"
- X
- X#undef main
- X#undef STOR_CL
- X#undef TYPE
- X#define main benchdbl
- X#include "benchdbl.c"
- END-of-benches.c
- echo file: benchfn.c
- sed 's/^X//' >benchfn.c << 'END-of-benchfn.c'
- X/* benchfn - benchmark for function calls
- X * Thomas Plum, Plum Hall Inc, 609-927-3770
- X * Let T be the execution time in milliseconds
- X * Then average time per operator = T/major usec
- X * (Because the inner loop has exactly 1000 operations)
- X */
- X#include <stdio.h>
- Xint dummy = 0;
- X
- Xf2() { f3();f3();f3();f3();f3();f3();f3();f3();f3();f3();} /* 10 */
- Xf1() { f2();f2();f2();f2();f2();f2();f2();f2();f2();f2();} /* 10 */
- Xf0() { f1();f1();f1();f1();f1();f1();f1();f1();f1();} /* 9 */
- X
- Xmain(ac, av)
- X int ac;
- X char *av[];
- X {
- X long d, major, atol();
- X
- X major = atol(av[1]);
- X printf("executing %ld iterations\n", major);
- X for (d = 1; d <= major; ++d)
- X f0(); /* executes 1000 calls */
- X printf("dummy=%d\n", dummy);
- X }
- END-of-benchfn.c
- echo file: benchlng.c
- sed 's/^X//' >benchlng.c << 'END-of-benchlng.c'
- X/* benchlng - benchmark for long integers
- X * Thomas Plum, Plum Hall Inc, 609-927-3770
- X * If machine traps overflow, use an unsigned type
- X * Let T be the execution time in milliseconds
- X * Then average time per operator = T/major usec
- X * (Because the inner loop has exactly 1000 operations)
- X */
- X#define STOR_CL auto
- X#define TYPE long
- X#include <stdio.h>
- Xmain(ac, av)
- X int ac;
- X char *av[];
- X {
- X STOR_CL TYPE a, b, c;
- X long d, major, atol();
- X static TYPE m[10] = {0};
- X
- X major = atol(av[1]);
- X printf("executing %ld iterations\n", major);
- X a = b = (av[1][0] - '0');
- X for (d = 1; d <= major; ++d)
- X {
- X /* inner loop executes 1000 selected operations */
- X for (c = 1; c <= 40; ++c)
- X {
- X a = a + b + c;
- X b = a >> 1;
- X a = b % 10;
- X m[a] = a;
- X b = m[a] - b - c;
- X a = b == c;
- X b = a | c;
- X a = !b;
- X b = a + c;
- X a = b > c;
- X }
- X }
- X printf("a=%d\n", a);
- X }
- END-of-benchlng.c
- echo file: benchmul.c
- sed 's/^X//' >benchmul.c << 'END-of-benchmul.c'
- X/* benchmul - benchmark for int multiply
- X * Thomas Plum, Plum Hall Inc, 609-927-3770
- X * If machine traps overflow, use an unsigned type
- X * Let T be the execution time in milliseconds
- X * Then average time per operator = T/major usec
- X * (Because the inner loop has exactly 1000 operations)
- X */
- X#define STOR_CL auto
- X#define TYPE int
- X#include <stdio.h>
- Xmain(ac, av)
- X int ac;
- X char *av[];
- X {
- X STOR_CL TYPE a, b, c;
- X long d, major, atol();
- X static TYPE m[10] = {0};
- X
- X major = atol(av[1]);
- X printf("executing %ld iterations\n", major);
- X a = b = (av[1][0] - '0');
- X for (d = 1; d <= major; ++d)
- X {
- X /* inner loop executes 1000 selected operations */
- X for (c = 1; c <= 40; ++c)
- X {
- X a = 3 *a*a*a*a*a*a*a*a * a*a*a*a*a*a*a*a * a*a*a*a*a*a*a*a * a; /* 25 * */
- X }
- X }
- X printf("a=%d\n", a);
- X }
- END-of-benchmul.c
- echo file: benchreg.c
- sed 's/^X//' >benchreg.c << 'END-of-benchreg.c'
- X/* benchreg - benchmark for register integers
- X * Thomas Plum, Plum Hall Inc, 609-927-3770
- X * If machine traps overflow, use an unsigned type
- X * Let T be the execution time in milliseconds
- X * Then average time per operator = T/major usec
- X * (Because the inner loop has exactly 1000 operations)
- X */
- X#define STOR_CL register
- X#define TYPE int
- X#include <stdio.h>
- Xmain(ac, av)
- X int ac;
- X char *av[];
- X {
- X STOR_CL TYPE a, b, c;
- X long d, major, atol();
- X static TYPE m[10] = {0};
- X
- X major = atol(av[1]);
- X printf("executing %ld iterations\n", major);
- X a = b = (av[1][0] - '0');
- X for (d = 1; d <= major; ++d)
- X {
- X /* inner loop executes 1000 selected operations */
- X for (c = 1; c <= 40; ++c)
- X {
- X a = a + b + c;
- X b = a >> 1;
- X a = b % 10;
- X m[a] = a;
- X b = m[a] - b - c;
- X a = b == c;
- X b = a | c;
- X a = !b;
- X b = a + c;
- X a = b > c;
- X }
- X }
- X printf("a=%d\n", a);
- X }
- END-of-benchreg.c
- echo file: benchsho.c
- sed 's/^X//' >benchsho.c << 'END-of-benchsho.c'
- X/* benchsho - benchmark for short integers
- X * Thomas Plum, Plum Hall Inc, 609-927-3770
- X * If machine traps overflow, use an unsigned type
- X * Let T be the execution time in milliseconds
- X * Then average time per operator = T/major usec
- X * (Because the inner loop has exactly 1000 operations)
- X */
- X#define STOR_CL auto
- X#define TYPE short
- X#include <stdio.h>
- Xmain(ac, av)
- X int ac;
- X char *av[];
- X {
- X STOR_CL TYPE a, b, c;
- X long d, major, atol();
- X static TYPE m[10] = {0};
- X
- X major = atol(av[1]);
- X printf("executing %ld iterations\n", major);
- X a = b = (av[1][0] - '0');
- X for (d = 1; d <= major; ++d)
- X {
- X /* inner loop executes 1000 selected operations */
- X for (c = 1; c <= 40; ++c)
- X {
- X a = a + b + c;
- X b = a >> 1;
- X a = b % 10;
- X m[a] = a;
- X b = m[a] - b - c;
- X a = b == c;
- X b = a | c;
- X a = !b;
- X b = a + c;
- X a = b > c;
- X }
- X }
- X printf("a=%d\n", a);
- X }
- END-of-benchsho.c
- echo file: clock.c
- sed 's/^X//' >clock.c << 'END-of-clock.c'
- X/* clock - primitive version of ANSI 'clock' function for UNIX */
- Xlong clock()
- X {
- X struct tbuff { long pu; long ps; long cu; long cs; } tbuff;
- X
- X times(&tbuff);
- X return(tbuff.pu + tbuff.ps);
- X }
- END-of-clock.c
- echo file: f3.c
- sed 's/^X//' >f3.c << 'END-of-f3.c'
- X/* f3 - lowest level function
- X * Put this in separate source file if compiler detects and optimizes
- X * useless code
- X */
- Xf3() { }
- END-of-f3.c
- echo file: fround.c
- sed 's/^X//' >fround.c << 'END-of-fround.c'
- X/* fround - round double x to precision p, n significant digits
- X * uses static string for result - not re-entrant
- X * fround is an accomodation for K+R-level printf which lacks %.*e or %g
- X * slow, fat version - uses sprintf
- X */
- X#include <stdio.h>
- Xchar *fround(x, p, n)
- X double x;
- X short p;
- X short n;
- X {
- X double y;
- X double log10();
- X short digs;
- X short nlog;
- X static char s[40] = {0};
- X char fmt[20];
- X
- X sprintf(fmt, "%%.%de", n-1);
- X sprintf(s, fmt, x);
- X sscanf(s, "%lf", &y);
- X if (y == 0)
- X nlog = 0;
- X else
- X nlog = log10(y);
- X if (nlog < 0)
- X --nlog;
- X digs = n - nlog - 1;
- X if (digs < 0)
- X digs = 0;
- X else if (digs > p)
- X digs = p;
- X sprintf(fmt, "%%.%df", digs);
- X sprintf(s, fmt, y);
- X if (digs == 0)
- X strcat(s, ".");
- X while (digs++ < p)
- X strcat(s, " ");
- X return (s);
- X }
- X#ifdef TRYMAIN
- Xmain()
- X {
- X short m;
- X
- X for (m = 1; m <= 5; ++m)
- X printf("fround(123.57, 2, %d) = %s;\n", m, fround(123.57, 2, m));
- X for (m = 1; m <= 5; ++m)
- X printf("fround(.013579, 5, %d) = %s;\n", m, fround(.013579, 5, m));
- X }
- X#endif
- END-of-fround.c
- echo file: run-all.c
- sed 's/^X//' >run-all.c << 'END-of-run-all.c'
- X/* do_allbench - run all the benchmark programs */
- X#include <stdio.h>
- X#define NBENCHES 6
- X#define TIME_FMT "Current time is %lf:%lf:%lf"
- X#define CPUTIME_MIN 10000.
- Xstatic struct timing
- X {
- X double cputime; char *fname; char *title1; char *title2;
- X } timings[NBENCHES] =
- X {
- X 0., "benchreg", "register", "int",
- X 0., "benchsho", "auto", "short",
- X 0., "benchlng", "auto", "long",
- X 0., "benchmul", "integer", "multiply",
- X 0., "benchfn", "function", "call",
- X 0., "benchdbl", "auto", "double",
- X };
- Xstatic char cc_cmd[BUFSIZ] = {0};
- Xstatic char command[BUFSIZ] = {0};
- Xint compile(fname)
- X char *fname;
- X {
- X sprintf(command, cc_cmd, fname);
- X return (system(command));
- X }
- Xint mk_crlf()
- X {
- X FILE *crlf;
- X
- X crlf = fopen("cr-lf", "w");
- X if (crlf == NULL)
- X {
- X fprintf(stderr, "unable to create file crlf\n");
- X exit(2);
- X }
- X putc('\n', crlf);
- X fclose(crlf);
- X }
- Xdouble rd_time(tmpname)
- X char *tmpname;
- X {
- X FILE *fp;
- X double hrs, mins, secs;
- X
- X fp = fopen(tmpname, "r");
- X fgets(buf, sizeof(buf), fp);
- X sscanf(buf, TIME_FMT, &hrs, &mins, &secs);
- X fclose(fp);
- X return (1000 * (secs + 60 * (mins + 60 * hrs));
- X }
- Xdouble time_it(fname, iterations)
- X char *fname;
- X long iterations;
- X {
- X double t0, t1;
- X
- X sprintf(command, "time <cr-lf >t0");
- X system(command);
- X t0 = rd_time("t0");
- X sprintf(command, "%s %ld", fname, iterations);
- X system(command);
- X sprintf(command, "time <cr-lf >t1");
- X system(command);
- X t1 = rd_time("t1");
- X return (t1 - t0);
- X }
- Xdouble run(fname, major)
- X char *fname;
- X long major;
- X {
- X double t_empty, t_major;
- X
- X t_empty = time_it(fname, 0L);
- X t_major = time_it(fname, major);
- X return (t_major - t_empty);
- X }
- Xdouble do_all(fname)
- X char *fname;
- X {
- X double cputime;
- X long major;
- X
- X compile(fname);
- X major = MAJOR_MIN;
- X do {
- X cputime = run(fname, major);
- X major *= 10;
- X } while (cputime < CPUTIME_MIN);
- X return (cputime / major);
- X }
- Xmain(ac, av)
- X int ac;
- X char *av[];
- X {
- X int i;
- X
- X strcpy(cc_cmd, av[1]);
- X for (i = 0; i <= NBENCHES; ++i)
- X timings[i].cputime = do_all(timings[i].fname);
- X printf("\n\n\nRESULTS:\n\n");
- X for (i = 0; i <= NBENCHES; ++i)
- X printf("%10s ", timings[i].title1;
- X printf(\n");
- X for (i = 0; i <= NBENCHES; ++i)
- X printf("%10s ", timings[i].title2;
- X printf(\n");
- X for (i = 0; i <= NBENCHES; ++i)
- X printf("%10.4f ", timings[i].cputime);
- X printf("\n\n(All times are in microseconds\n");
- X }
- END-of-run-all.c
- echo file: benches.out
- sed 's/^X//' >benches.out << 'END-of-benches.out'
- Xexecuting 1 iterations
- Xa=0
- Xexecuting 10 iterations
- Xa=0
- Xexecuting 100 iterations
- Xa=0
- Xexecuting 1000 iterations
- Xa=0
- Xexecuting 10000 iterations
- Xa=0
- Xexecuting 1 iterations
- Xa=0
- Xexecuting 10 iterations
- Xa=0
- Xexecuting 100 iterations
- Xa=0
- Xexecuting 1000 iterations
- Xa=0
- Xexecuting 10000 iterations
- Xa=0
- Xexecuting 1 iterations
- Xa=0
- Xexecuting 10 iterations
- Xa=0
- Xexecuting 100 iterations
- Xa=0
- Xexecuting 1000 iterations
- Xa=0
- Xexecuting 10000 iterations
- Xa=0
- Xexecuting 1 iterations
- Xa=-407629151
- Xexecuting 10 iterations
- Xa=-483154367
- Xexecuting 100 iterations
- Xa=-1034506623
- Xexecuting 1000 iterations
- Xa=-1045589759
- Xexecuting 1 iterations
- Xdummy=0
- Xexecuting 10 iterations
- Xdummy=0
- Xexecuting 100 iterations
- Xdummy=0
- Xexecuting 1000 iterations
- Xdummy=0
- Xexecuting 1 iterations
- Xa=0
- Xexecuting 10 iterations
- Xa=0
- Xexecuting 100 iterations
- Xa=0
- X
- X
- X register auto auto int function auto
- X int short long multiply call+ret double
- X xenix-386 0.24 0.46 0.43 1.87 3.00 90.5
- END-of-benches.out
- echo file: run-all.out
- sed 's/^X//' >run-all.out << 'END-of-run-all.out'
- X+ cc -o benchfn.x benchfn.c
- Xbenchfn.c
- X+ time benchfn.x 1000
- Xexecuting 1000 iterations
- Xdummy=0
- X
- Xreal 3.3
- Xuser 3.1
- Xsys 0.0
- X+ cc -o benchmul.x benchmul.c
- Xbenchmul.c
- X+ time benchmul.x 10000
- Xexecuting 10000 iterations
- Xa=427469313
- X
- Xreal 19.0
- Xuser 18.8
- Xsys 0.1
- X+ cc -o benchlng.x benchlng.c
- Xbenchlng.c
- X+ time benchlng.x 10000
- Xexecuting 10000 iterations
- Xa=0
- X
- Xreal 5.1
- Xuser 5.0
- Xsys 0.0
- X+ cc -o benchsho.x benchsho.c
- Xbenchsho.c
- X+ time benchsho.x 10000
- Xexecuting 10000 iterations
- Xa=0
- X
- Xreal 5.2
- Xuser 5.1
- Xsys 0.1
- X+ cc -o benchreg.x benchreg.c
- Xbenchreg.c
- X+ time benchreg.x 10000
- Xexecuting 10000 iterations
- Xa=0
- X
- Xreal 2.6
- Xuser 2.5
- Xsys 0.0
- X+ cc -o benchdbl.x benchdbl.c
- Xbenchdbl.c
- X+ time benchdbl.x 10000
- Xexecuting 10000 iterations
- Xa=0
- X
- Xreal 15:04.5
- Xuser 15:04.4
- Xsys 0.1
- END-of-run-all.out
- echo file: run-all.bat
- sed 's/^X//' >run-all.bat << 'END-of-run-all.bat'
- Xcl benchreg.c
- Xcommand /c time-cmd benchreg >benchreg.out
- X
- Xcl benchsho.c
- Xcommand /c time-cmd benchsho >benchsho.out
- X
- Xcl benchlng.c
- Xcommand /c time-cmd benchlng >benchlng.out
- X
- Xcl benchfn.c
- Xcommand /c time-cmd benchfn >benchfn.out
- X
- Xcl benchmul.c
- Xcommand /c time-cmd benchmul >benchmul.out
- X
- Xcl benchdbl.c
- Xcommand /c time-cmd benchdbl >benchdbl.out
- X
- END-of-run-all.bat
- echo file: time-cmd.bat
- sed 's/^X//' >time-cmd.bat << 'END-of-time-cmd.bat'
- Xtime <cr-lf
- X%1 0
- Xtime <cr-lf
- X%1 10000
- Xtime <cr-lf
- END-of-time-cmd.bat
- echo file: time-dbl.bat
- sed 's/^X//' >time-dbl.bat << 'END-of-time-dbl.bat'
- Xtime <cr-lf
- Xbenchdbl 0
- Xtime <cr-lf
- Xbenchdbl 100
- Xtime <cr-lf
- END-of-time-dbl.bat
- echo file: run-all.sh
- sed 's/^X//' >run-all.sh << 'END-of-run-all.sh'
- Xcc -o benchfn.x benchfn.c
- Xtime benchfn.x 1000
- Xcc -o benchmul.x benchmul.c
- Xtime benchmul.x 10000
- Xcc -o benchlng.x benchlng.c
- Xtime benchlng.x 10000
- Xcc -o benchsho.x benchsho.c
- Xtime benchsho.x 10000
- Xcc -o benchreg.x benchreg.c
- Xtime benchreg.x 10000
- Xcc -o benchdbl.x benchdbl.c
- Xtime benchdbl.x 10000
- END-of-run-all.sh
- echo file: cr-lf
- sed 's/^X//' >cr-lf << 'END-of-cr-lf'
- X
- END-of-cr-lf
- echo file: n-n
- sed 's/^X//' >n-n << 'END-of-n-n'
- Xn
- Xn
- END-of-n-n
- echo file: bench.tbl
- sed 's/^X//' >bench.tbl << 'END-of-bench.tbl'
- XMachine/compiler register auto auto int func auto
- X int short long multiply call dbl
- X
- XAT&T 3B2/05 (-O) 1.36 3.87 2.62 15.4 7.7 22.5
- XAT&T 3B2/05 (no -O) 1.78 4.66 2.75 16.2 9.3 22.5
- XAT&T 3B2/400 (-O) 1.09 1.36 1.10 16.2 10.0(?) 91.4
- XAT&T 3B2/400 (no -O) 1.14 2.61 2.36 17.3 11.3 91.1
- XAT&T 6386/375 (no -O) 0.61 1.39 1.23 3.85 5.62 6.77
- XAT&T 6386/375 (-O) 0.52 1.17 0.54 3.68 5.78 7.68
- XApollo DN330 (-O) 1.36 .78 1.36 10.17 3.57
- XApollo DN330 (no -O) 1.54 1.28 1.54 11.30 3.64
- XApollo DN580 (-O) 1.03 .59 1.03 7.67 2.72
- XApollo DN580 (no -O) 1.18 .97 1.18 8.48 2.77
- XApollo DN660 (_O) 5.88 1.24 5.88 21.86 4.26
- XApollo DN660 (no -O) 5.93 1.52 5.93 21.93 4.29
- XMasscomp 5500 3.18 2.7 4.9 30.8 7.3
- XMasscomp 5600 (-O) .45 .61 .46 2.83 1.04
- XMasscomp 5600 (no -O) .46 .78 .64 2.99 1.76
- XPC/8088 (InstantC) 25.8 25.8 82.0 74.2 152.
- XPC/8088 (WSL 3.1 lg) 6.18 10.4 66.5 31.8 28.8
- XPyramid 90X (-O) .85 1.04 .86 3.64 1.9 2.37
- XPyramid 90X (no -O) .86 1.01 .86 3.65 1.8 2.34
- XSequent (-O) 1.39 2.99 2.53 9.90 9.3
- XSequent (no -O) 1.50 3.25 2.83 9.95 13.2
- XSun 3/260HM (-O) .31 .48 .47 1.98 1.16
- XSun 3/260HM (no -O) .36 .58 .57 1.99 1.62
- XSun 3/75M (-O) .47 .77 .76 3.00 2.12
- XSun 3/75M (no -O) .53 .95 .94 3.01 2.73
- XSun 3/75M(4.2, -O) .50 .81 .83 2.85 1.5 20.7
- XSun 3/75M(4.2, no -O) .54 1.00 1.01 2.97 2.7 21.1
- XSun 3/75M(VM, -O) .46 .77 .75 2.96 2.1 20.8
- XSun 3/75M(VM, no -O) .52 .96 .93 2.97 2.7 21.1
- XVAX 11/730 (-O) 4.00 9.80 6.20 16.2 42.8 12.4
- XVAX 11/730 (no -O) 4.73 10.2 7.45 16.57 51.5 17.0
- XVAX 11/780 (-O) 1.21 2.43 1.67 2.76 15.04 2.95
- XVAX 11/780 (BSD 4.2) 1.38 2.42 1.96 2.92 17.2
- XVAX 11/780 (UNIX 5.2) 1.24 2.48 1.79 2.72 15.7 3.89
- XVAX 11/780 (no -O) 1.29 2.51 1.85 2.70 16.7 3.89
- XVAX 11/785 (-O) .93 1.85 1.32 5.00 13.9 47.5
- XVAX 11/785 (no -O) 1.01 1.96 1.44 5.08 14.2 5.42
- XVAX 8650(UNIX -O) .236 .484 .298 .589 2.63 .578
- XVAX 8650(UNIX no -O) .258 .482 .316 .574 3.06 .791
- XVAX 8650(Ultrix -O) .23 .40 .29 .53 2.4 .56
- XVAX 8650(Ultrix no -O) .26 .41 .34 .56 2.8 .77
- END-of-bench.tbl
- echo file: ARTICLE
- sed 's/^X//' >ARTICLE << 'END-of-ARTICLE'
- X
- X
- X
- X
- X
- X
- X[The following article appeared in "C Users Journal" May 1988.
- X It describes the purpose and use of the enclosed benchmarks. ]
- X
- X
- XSIMPLE BENCHMARKS FOR C COMPILERS
- X
- Xby Thomas Plum
- X
- XDr.Plum is the author of several books on C, including Efficient C (co-
- Xauthored with Jim Brodie). He is Vice-Chair of the ANSI X3J11 Committee,
- Xand Chairman of Plum Hall Inc, which offers introductory and advanced sem-
- Xinars on C.
- X
- XCopyright (c) 1988, Plum Hall Inc
- X
- X
- XWe are placing into the public domain some simple benchmarks with several
- Xappealing properties:
- X
- X They are short enough to type while browsing at trade shows.
- X
- X They are protected against overly-aggressive compiler optimizations.
- X
- X They reflect empirically-observed operator frequencies in C programs.
- X
- X They give a C programmer information directly relevant to programming.
- X
- XIn Efficient C, Jim Brodie and I described how useful it can be for a pro-
- Xgrammer to have a general idea of how many microseconds it takes to execute
- Xthe "average operator" on register int's, on auto short's, on auto
- Xlong's, and on double data, as well as the time for an integer multiply,
- Xand the time to call-and-return from a function. These six numbers allow a
- Xprogrammer to make very good first-order estimates of the CPU time that a
- Xparticular algorithm will take.
- X
- XThe following easily-typed benchmark programs determine these times
- Xdirectly. The first one is benchreg.c ("benchmark for register opera-
- Xtors"):
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X - 1 -
- X
- X
- X
- X
- X
- X - 2 -
- X
- X
- X 1 /* benchreg - benchmark for register integers
- X 2 * Thomas Plum, Plum Hall Inc, 609-927-3770
- X 3 * If machine traps overflow, use an unsigned type
- X 4 * Let T be the execution time in milliseconds
- X 5 * Then average time per operator = T/major usec
- X 6 * (Because the inner loop has exactly 1000 operations)
- X 7 */
- X 8 #define STOR_CL register
- X 9 #define TYPE int
- X 10 #include <stdio.h>
- X 11 main(ac, av)
- X 12 int ac;
- X 13 char *av[];
- X 14 {
- X 15 STOR_CL TYPE a, b, c;
- X 16 long d, major, atol();
- X 17 static TYPE m[10] = {0};
- X 18
- X 19 major = atol(av[1]);
- X 20 printf("executing %ld iterations0, major);
- X 21 a = b = (av[1][0] - '0');
- X 22 for (d = 1; d <= major; ++d)
- X 23 {
- X 24 /* inner loop executes 1000 selected operations */
- X 25 for (c = 1; c <= 40; ++c)
- X 26 {
- X 27 a = a + b + c;
- X 28 b = a >> 1;
- X 29 a = b % 10;
- X 30 m[a] = a;
- X 31 b = m[a] - b - c;
- X 32 a = b == c;
- X 33 b = a | c;
- X 34 a = !b;
- X 35 b = a + c;
- X 36 a = b > c;
- X 37 }
- X 38 }
- X 39 printf("a=%d0, a);
- X 40 }
- X
- XIf you enter this and compile it to produce an executable program, you can
- Xinvoke it with one argument, the number of iterations for the major loop:
- X
- X benchreg 10000
- X
- XIf this execution takes 16 seconds, this means that the average register
- Xoperation takes 1.6 microseconds (16,000 milliseconds divided by 10,000
- Xiterations of the major loop).
- X
- XLet us examine the program in detail. Lines 8 and 9 define STOR_CL
- X("storage class") and TYPE to be register and int . Thus, on line 15,
- Xthree variables ( a , b , and c ) are declared to be of this storage class
- Xand type. At line 16, the major loop control variables are long integers,
- Xbut they are touched only one one-thousandth as often as the inner loop
- X
- X
- X
- X
- X
- X
- X
- X
- X - 3 -
- X
- X
- Xvariables, so they have little effect upon the timings. We are declaring
- Xthe atol function to return a long integer; it would otherwise default
- Xto an int return. (If we were using a compiler based upon draft ANSI C,
- Xwe could #include <stdlib.h> to get the declaration of atol , but this
- Xwould limit the applicability of the benchmarks. This simple declaration is
- Xall that even an ANSI compiler would need.)
- X
- XAt line 19, we set the major loop variable to the number given on the com-
- Xmand line, and at line 20, we confirm it to the output.
- X
- XLine 21 is crucial to preventing some overly aggressive optimizations. Ear-
- Xlier versions of these benchmarks had simply initialized a and b to 1,
- Xbut this allows a compiler to forward-propagate a known constant value. The
- Xexpression av[1][0] gives the first digit-character of the command-line
- Xargument; subtracting '0' produces a digit between 0 and 9. (Yes, the
- Xlatest ANSI draft now guarantees that the digit characters are a contiguous
- Xsequence in any environment.)
- X
- XLine 22 simply executes the major loop the number of times given by the
- Xvariable major . Line 25 repeats the inner loop 40 times, and with 25
- Xoperators in that loop, this produces 1000 operators. (Actually there are
- X1003, because of the initialization and the extra increment and test at loop
- Xcompletion. The discrepancy is well within acceptable tolerances.)
- X
- XWithin the inner loop, 40% of the operators are assignments, in keeping with
- Xthe percentages reported in the original Drhystone work. Of the other
- Xoperators, the most frequent are plus and minus. The sequence of operations
- Xis carefully chosen to ensure that a very aggressive optimizer cannot find
- Xany useless code sections; each result depends functionally upon previous
- Xresults.
- X
- XFinally, the printout at line 39 is also important to preventing over-
- Xoptimization. If the compiler could notice that we did nothing with the
- Xcomputed result, it could discard all the operations that produced that
- Xresult.
- X
- XWe have completed our perusal of the first benchmark program, benchreg.c .
- XThe second program ( benchsho.c , for short's) is derived from benchreg.c
- Xby changing lines 8 and 9: STOR_CL becomes auto , and TYPE becomes
- Xshort . The program is otherwise unchanged.
- X
- XThe third program ( benchlng.c , for long's) is obtained by leaving
- XSTOR_CL as auto and changing TYPE to long .
- X
- XTo make the fourth program ( benchmul.c , for multiplies) we set TYPE to
- Xint , and change lines 27 through 36 to one source line which does 25 multi-
- Xplies:
- X
- X a = 3 *a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a; /* 25 * */
- X
- XThe fifth program ( benchfn.c , for functions) is a major rewrite. We
- Xarrange a series of function definitions for f3 , f2 , f1 , and f0 such
- Xthat each call to function f0 generates exactly 1000 function-call opera-
- Xtions. In case the compiler has an aggressive optimizer, move the function
- Xf3 to a separate source file, so that the compiler cannot see how useless
- X
- X
- X
- X
- X
- X
- X
- X
- X - 4 -
- X
- X
- Xit is. The global variable dummy will make the compiler think that f3
- Xmight be up to something useful. Here, then, is the benchfn.c function:
- X
- X 1 /* benchfn - benchmark for function calls
- X 2 * Thomas Plum, Plum Hall Inc, 609-927-3770
- X 3 * Let T be the execution time in milliseconds
- X 4 * Then average time per operator = T/major usec
- X 5 * (Because the inner loop has exactly 1000 operations)
- X 6 */
- X 7 #include <stdio.h>
- X 8 int dummy = 0;
- X 9
- X 10 /* f3 - lowest level function
- X 11 * Put this in separate source file if compiler detects and
- X 12 * optimizes useless code
- X 13 */
- X 14 f3() { }
- X 15
- X 16 f2() { f3();f3();f3();f3();f3();f3();f3();f3();f3();f3();} /* 10 */
- X 17 f1() { f2();f2();f2();f2();f2();f2();f2();f2();f2();f2();} /* 10 */
- X 18 f0() { f1();f1();f1();f1();f1();f1();f1();f1();f1();} /* 9 */
- X 19
- X 20 main(ac, av)
- X 21 int ac;
- X 22 char *av[];
- X 23 {
- X 24 long d, major, atol();
- X 25
- X 26 major = atol(av[1]);
- X 27 printf("executing %ld iterations0, major);
- X 28 for (d = 1; d <= major; ++d)
- X 29 f0(); /* executes 1000 calls */
- X 30 printf("dummy=%d0, dummy);
- X 31 }
- X
- XThe sixth program ( benchdblc. , for double's ) is derived from benchlng.c
- Xby changing STOR_CL to auto , TYPE to double , and replacing the inner
- Xloop body with this slightly different version:
- X
- X a = a + b + c;
- X b = a * 2;
- X a = b / 10;
- X a = -a;
- X b = -a - b - c;
- X a = b == c;
- X b = a + c;
- X a = !b;
- X b = a + c;
- X a = b > c;
- X
- XThese changes are necessary because floating-point operands are not allowed
- Xfor the shift, remainder, and bitwise operators, and because the subscript
- Xoperator does not really exercise the floating-point instructions. This
- Xrevised inner loop still gives us a representative mix of typical opera-
- Xtions.
- X
- X
- X
- X
- X
- X
- X
- X
- X - 5 -
- X
- X
- XThis, then, completes our collection of six benchmark programs. After they
- Xare compiled to produce executable programs, the next question is "How do I
- Xtime the execution?"
- X
- XOn UNIX systems, the timing is easy -- just run the time command:
- X
- X $ time benchreg 10000
- X
- XThe sum of the "user" and "system" times will give the CPU time used by the
- Xprogram.
- X
- XMore accurately, we could time the execution of zero iterations, and sub-
- Xtract that time from the time for the measured number of iterations.
- X
- XOn MS-DOS systems, timings can be obtained, but with greater difficulty. If
- Xwe create a file named CR-LF which contains just one newline (or
- X"carriage-return-newline" in DOS parlance), we could time our program with a
- X"batch" file such as this:
- X
- X time <cr-lf
- X benchreg 0
- X time <cr-lf
- X benchreg 10000
- X time <cr-lf
- X
- XWe must then take times that are expressed in minutes-and-seconds and pro-
- Xduce differences expressed in seconds.
- X
- XWith whichever method, we eventually produce six numbers that are character-
- Xistic of a particular environment (a specific compiler supporting a specific
- Xmachine).
- X
- X[NOTE: Since this article appeared, I have added a driver program, benches.c.
- XIn an ANSI environment with the clock function, it will run all the tests
- Xand report the results, eliminating the need for manual computations.]
- X
- XHere are some examples of timing results that have been obtained on a
- Xvariety of minicomputer and workstation environments:
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X
- X - 6 -
- X
- X
- XMachine/compiler register auto auto int func auto
- X int short long multiply call dbl
- X
- XAT&T 3B2/05 (-O) 1.36 3.87 2.62 15.4 7.7 22.5
- XAT&T 3B2/05 (no -O) 1.78 4.66 2.75 16.2 9.3 22.5
- XAT&T 3B2/400 (-O) 1.09 1.36 1.10 16.2 10.0(?) 91.4
- XAT&T 3B2/400 (no -O) 1.14 2.61 2.36 17.3 11.3 91.1
- XApollo DN330 (-O) 1.36 .78 1.36 10.17 3.57
- XApollo DN330 (no -O) 1.54 1.28 1.54 11.30 3.64
- XApollo DN580 (-O) 1.03 .59 1.03 7.67 2.72
- XApollo DN580 (no -O) 1.18 .97 1.18 8.48 2.77
- XApollo DN660 (-O) 5.88 1.24 5.88 21.86 4.26
- XApollo DN660 (no -O) 5.93 1.52 5.93 21.93 4.29
- XCray X-MP (no vectors) .0567 .0656 .0822 .366 .821 .082
- XMasscomp 5500 3.18 2.7 4.9 30.8 7.3
- XMasscomp 5600 (-O) .45 .61 .46 2.83 1.04
- XMasscomp 5600 (no -O) .46 .78 .64 2.99 1.76
- XPyramid 90X (-O) .85 1.04 .86 3.64 1.9 2.37
- XPyramid 90X (no -O) .86 1.01 .86 3.65 1.8 2.34
- XSequent (-O) 1.39 2.99 2.53 9.90 9.3
- XSequent (no -O) 1.50 3.25 2.83 9.95 13.2
- XSun 3/260HM (-O) .31 .48 .47 1.98 1.16
- XSun 3/260HM (no -O) .36 .58 .57 1.99 1.62
- XSun 3/75M (-O) .47 .77 .76 3.00 2.12
- XSun 3/75M (no -O) .53 .95 .94 3.01 2.73
- XSun 3/75M(4.2, -O) .50 .81 .83 2.85 1.5 20.7
- XSun 3/75M(4.2, no -O) .54 1.00 1.01 2.97 2.7 21.1
- XSun 3/75M(VM, -O) .46 .77 .75 2.96 2.1 20.8
- XSun 3/75M(VM, no -O) .52 .96 .93 2.97 2.7 21.1
- XVAX 11/730 (-O) 4.00 9.80 6.20 16.2 42.8 12.4
- XVAX 11/730 (no -O) 4.73 10.2 7.45 16.57 51.5 17.0
- XVAX 11/780 (-O) 1.21 2.43 1.67 2.76 15.0 2.95
- XVAX 11/780 (BSD 4.2) 1.38 2.42 1.96 2.92 17.2
- XVAX 11/780 (UNIX 5.2) 1.24 2.48 1.79 2.72 15.7 3.89
- XVAX 11/780 (no -O) 1.29 2.51 1.85 2.70 16.7 3.89
- XVAX 11/785 (-O) .93 1.85 1.32 5.00 13.9 47.5
- XVAX 11/785 (no -O) 1.01 1.96 1.44 5.08 14.2 5.42
- XVAX 8650(UNIX -O) .236 .484 .298 .589 2.63 .578
- XVAX 8650(UNIX no -O) .258 .482 .316 .574 3.06 .791
- XVAX 8650(Ultrix -O) .23 .40 .29 .53 2.4 .56
- XVAX 8650(Ultrix no -O) .26 .41 .34 .56 2.8 .77
- X
- XNotice that some of these timings were run before the benchdbl benchmark
- Xhad been written. There are no examples of the popular PC environments in
- Xthis table. If interested readers wish to run these benchmarks on their own
- Xenvironments, I will endeavor to present these results in a future article.
- X
- XProcessor speeds are sometimes described in "MIPS" (millions of instructions
- Xper second); using a value such as the number of register operators per
- Xsecond in C might give rise to a "MOPS" measurement of more use to C pro-
- Xgrammers. Those of us who have tried these benchmarks have appreciated the
- Xintuitive grasp that they give of the speed of current machines and com-
- Xpilers. I hope that you too will find them of interest.
- X
- X
- X
- X
- X
- X
- X
- END-of-ARTICLE
- exit
- --
- Eric S. Raymond = eric@snark.uu.net (mad mastermind of TMN-Netnews)
-
-
- --
- Please send comp.sources.unix-related mail to rsalz@uunet.uu.net.
- Use a domain-based address or give alternate paths, or you may lose out.
-