\ Floating point benchmarks in ANS Forth 0 [IF] ======================================================== This file is maintained by: ... ... =============================================================== [THEN] DECIMAL \ ************************************************ \ Select system to be tested, set FORTHSYSTEM \ to value of selected target. \ Set SPECIFICS FALSE to avoid system dependencies. \ Set SPECIFICS TRUE to show off implementation tricks. \ Set HACKING FALSE to use the base source code. \ Set HACKING TRUE to optimise the source code. \ ************************************************ 1 CONSTANT VfxForth3 \ MPE VFX Forth v3.x 2 CONSTANT Pfw22 \ MPE ProForth 2.2 3 CONSTANT SwiftForth20 \ FI SwiftForth 2.0 4 CONSTANT SwiftForth15 \ FI SwiftForth 1.5 5 CONSTANT Win32Forth \ Win32Forth 4.2 6 CONSTANT BigForth \ BigForth 11 July 1999 7 CONSTANT BigForth-Linux \ BigForth 11 July 1999 8 CONSTANT iForth \ iForth 1.12 5 Aug 2001 9 CONSTANT iForth20 \ iForth 2.0 8 June 2002 10 CONSTANT SwiftForth22 \ FI SwiftForth 2.2.2.9 11 CONSTANT gForth-fast \ gForth 0.6.9 s" gforth" environment? [if] 2drop gforth-fast [else] iforth20 \ VfxForth3 \ select system to test \ SwiftForth22 \ Win32Forth [then] CONSTANT ForthSystem FALSE CONSTANT specifics \ TRUE to use system dependent code FALSE CONSTANT hacking \ TRUE to use "guru" level code that \ makes assumptions of an optimising compiler. TRUE CONSTANT ANSSystem \ Some Forth 83 systems cannot compile \ all the test examples without carnal \ knowledge, especially if the compiler \ checks control structures. : .specifics ( -- ) \ display trick state ." using" specifics 0= IF ." no" THEN ." extensions" ; : .hacking ( -- ) \ display hack state ." using" hacking 0= IF ." no" THEN ." hackery" ; : .testcond ( -- ) \ display test conditions .specifics ." and" .hacking ; \ ***************************** \ VFX Forth for Windows harness \ ***************************** VfxForth3 ForthSystem = [IF] [defined] +idata [if] +idata \ enable P4 data options variable zzz \ preallocate first IDATA buffer [then] TRUE CONSTANT ndp? \ -- flag ; TRUE if NDP stack version c" C:\Program Files\mpe\VfxEval\Lib" setmacro LibDir c" C:\Program Files\mpe\VfxEval\Lib" setmacro NdpDir ndp? [if] S" %NdpDir%\Ndp387" INCLUDED [else] S" %NdpDir%\Hfp387" INCLUDED [then] char . dp-char ! \ select ANS number conversion char . fp-char ! -short-branches \ disable short forward branches extern: DWORD PASCAL GetTickCount( void ); : COUNTER \ -- ms GetTickCount ; : DOC ; : ENDDOC ; : =: CONSTANT ; : -- POSTPONE \ ; IMMEDIATE : F1+ 1e F+ ; : F2DUP FOVER FOVER ; : F2DROP FDROP FDROP ; : FSQR FDUP F* ; : EOL ; 1e fatan 4e f* FCONSTANT PI [THEN] \ ****************************** \ iForth 2.0 for Windows harness \ ****************************** iForth20 ForthSystem = [IF] NEEDS -miscutil NEEDS -dynlink 0 VALUE 'counter S" kernel32.dll" LIBRARY-OPEN THROW ( dll) S" GetTickCount" ROT LIBRARY-FIND THROW TO 'counter : COUNTER ( -- ms ) 0 'counter FOREIGN ; [THEN] \ ********************** \ SwiftForth 2.0 harness \ ********************** SwiftForth20 ForthSystem = [IF] CR .( Not tested ) include C:\program Files\SwiftForth20\Lib\Options\fpmath.f include C:\Program Files\SwiftForth20\Lib\FSLib\Library\fsl-util.f [THEN] \ ********************** \ SwiftForth 2.2 harness \ ********************** SwiftForth22 ForthSystem = [IF] CR .( Not tested ) \ FPCONFIG.F should be in the BENCHMRK folder include C:\Program files\SwiftForth2229\Lib\Options\fpmath.f include C:\Program Files\SwiftForth2229\Unsupported\FSLib\Library\fsl-util.f [THEN] \ ****************** \ Win32Forth harness \ ****************** Win32Forth ForthSystem = [IF] CR .( Not tested ) : COUNTER \ -- ms Call GetTickCount ; [THEN] \ ************** \ gforth harness \ ************** gForth-fast ForthSystem = [IF] variable out \ -- addr : temit \ -- char 1 out +! (emit) ; ' temit is emit : ttype \ addr len -- dup out +! (type) ; ' ttype is type : cr \ -- cr out off ; : >pos \ n -- ; step to position n out @ - spaces ; decimal 0 CONSTANT U>D : counter \ -- ms cputime d+ 1000 um/mod nip ; create pocket 256 allot : c" \ -- [comp] ; -- addr [interp] state @ if postpone c" else [char] " parse pocket place pocket endif ; immediate : [o/n] ; IMMEDIATE : M/ \ d n1 -- quot sm/rem nip ; : buffer: \ n -- ; -- addr create here over allot swap erase ; : 2- \ n -- n-2 s" 2 -" evaluate ; immediate : u2/ \ u -- u' s" 1 RSHIFT" evaluate ; immediate : not \ x -- x' s" invert" evaluate ; immediate 0 constant HWND_DESKTOP 16 constant WM_CLOSE : SendMessage \ h m w l -- flag 2drop 2drop 0 ; : EOL ; : DOC ; : ENDDOC ; : =: CONSTANT ; : -- POSTPONE \ ; IMMEDIATE : F1+ 1e F+ ; : F2DUP FOVER FOVER ; : F2DROP FDROP FDROP ; : FSQR FDUP F* ; : FVALUE ( r "name" -- ) create f, does> f@ ; : [fto] ( r "name" -- ) ' >body postpone literal postpone f! ; immediate compile-only : s>f ( n -- r ) s>d d>f ; : f>s ( r -- n ) f>d drop ; [THEN] \ ******************** \ Start of common code \ ******************** 0 VALUE _time_ VARIABLE TotalTime : TIMER-RESET ( -- ) COUNTER TO _time_ ; : #? ( d1 -- d2 ) 2DUP OR 0= IF BL HOLD ELSE # THEN ; : .secs ( ms -- ) 0 <# BL HOLD # # # [CHAR] . HOLD # #? #? #> TYPE ." seconds" ; : .ELAPSED ( -- ) COUNTER _time_ - DUP TotalTime +! .secs ; : MS? COUNTER _time_ - ; ( -- ms ) \ ============================================================== \ ============ Finally -- the benchmark code =================== \ ============================================================== ( * * LANGUAGE : ANS Forth * PROJECT : Forth Environments * DESCRIPTION : Estimate MFLOPS rating * CATEGORY : Benchmark * AUTHOR : Marcel Hendrix * LAST CHANGE : January 13, 2001, Marcel Hendrix * ) \ ANEW -flops 0 [IF] Flops.c is a 'c' program which attempts to estimate your systems floating-point 'MFLOPS' rating for the FADD, FSUB, FMUL, and FDIV operations based on specific 'instruction mixes' (discussed below). The program provides an estimate of PEAK MFLOPS performance by making maximal use of register variables with minimal interaction with main memory. The execution loops are all small so that they will fit in any cache. Flops.c can be used along with Linpack and the Livermore kernels (which exersize memory much more extensively) to gain further insight into the limits of system performance. The flops.c execution modules also include various percent weightings of FDIV's (from 0% to 25% FDIV's) so that the range of performance can be obtained when using FDIV's. FDIV's, being computationally more intensive than FADD's or FMUL's, can impact performance considerably on some systems. Flops.c consists of 8 independent modules (routines) which, except for module 2, conduct numerical integration of various functions. Module 2, estimates the value of pi based upon the Maclaurin series expansion of atan(1). MFLOPS ratings are provided for each module, but the programs overall results are summerized by the MFLOPS(1), MFLOPS(2), MFLOPS(3), and MFLOPS(4) outputs. The MFLOPS(1) result is identical to the result provided by all previous versions of flops.c. It is based only upon the results from modules 2 and 3. Two problems surfaced in using MFLOPS(1). First, it was difficult to completely 'vectorize' the result due to the recurrence of the 's' variable in module 2. This problem is addressed in the MFLOPS(2) result which does not use module 2, but maintains nearly the same weighting of FDIV's (9.2%) as in MFLOPS(1) (9.6%). The second problem with MFLOPS(1) centers around the percentage of FDIV's (9.6%) which was viewed as too high for an important class of problems. This concern is addressed in the MFLOPS(3) result where NO FDIV's are conducted at all. The number of floating-point instructions per iteration (loop) is given below for each module executed: MODULE FADD FSUB FMUL FDIV TOTAL Comment 1 7 0 6 1 14 7.1% FDIV's 2 3 2 1 1 7 difficult to vectorize. 3 6 2 9 0 17 0.0% FDIV's 4 7 0 8 0 15 0.0% FDIV's 5 13 0 15 1 29 3.4% FDIV's 6 13 0 16 0 29 0.0% FDIV's 7 3 3 3 3 12 25.0% FDIV's 8 13 0 17 0 30 0.0% FDIV's A*2+3 21 12 14 5 52 A=5, MFLOPS(1), Same as 40.4% 23.1% 26.9% 9.6% previous versions of the flops.c program. Includes only Modules 2 and 3, does 9.6% FDIV's, and is not easily vectorizable. 1+3+4 58 14 66 14 152 A=4, MFLOPS(2), New output +5+6+ 38.2% 9.2% 43.4% 9.2% does not include Module 2, A*7 but does 9.2% FDIV's. 1+3+4 62 5 74 5 146 A=0, MFLOPS(3), New output +5+6+ 42.9% 3.4% 50.7% 3.4% does not include Module 2, 7+8 but does 3.4% FDIV's. 3+4+6 39 2 50 0 91 A=0, MFLOPS(4), New output +8 42.9% 2.2% 54.9% 0.0% does not include Module 2, and does NO FDIV's. NOTE: Various timer routines are included as indicated below. The timer routines, with some comments, are attached at the end of the main program. NOTE: Please do not remove any of the printouts. EXAMPLE COMPILATION: UNIX based systems cc -DUNIX -O flops.c -o flops cc -DUNIX -DROPT flops.c -o flops cc -DUNIX -fast -O4 flops.c -o flops . . . etc. Al Aburto aburto@nosc.mil [THEN] 1e FCONSTANT A0 -0.1666666666671334e FCONSTANT A1 0.833333333809067e-2 FCONSTANT A2 0.198412715551283e-3 FVALUE A3 -- changeable ... 0.27557589750762e-5 FCONSTANT A4 0.2507059876207e-7 FVALUE A5 -- changeable ... 0.164105986683e-9 FCONSTANT A6 1e FCONSTANT B0 -0.4999999999982e FCONSTANT B1 0.4166666664651e-1 FCONSTANT B2 -0.1388888805755e-2 FCONSTANT B3 0.24801428034e-4 FCONSTANT B4 -0.2754213324e-6 FCONSTANT B5 0.20189405e-8 FCONSTANT B6 0.3999999946405e-1 FCONSTANT D1 0.96e-3 FCONSTANT D2 0.1233153e-5 FCONSTANT D3 0.48e-3 FCONSTANT E2 0.411051e-6 FCONSTANT E3 ( * ************************************************** Set Variable Values. T[1] references all timing results relative to one million loops. The program will execute from 31250 to 512000000 loops based on a runtime of Module 1 of at least TLimit = 15.0 seconds. That is, a runtime of 15 seconds for Module 1 is used to determine the number of loops to execute. No more than NLimit = 512000000 loops are allowed ************************************************** * ) 1500 =: TLimit -- 1.5 seconds ( was 15000 ) 51200000 =: NLimit -- maximum number of loops ( was 512000000 ) 15625 =: loops -- Initial number of loops, DO NOT CHANGE! : FVALUES 0 ?DO 0e FVALUE LOOP ; 8 FVALUES T[0] T[1] T[2] T[3] T[4] T[5] T[6] T[7] 8 FVALUES T[8] T[9] T[10] T[11] T[12] T[13] T[14] T[15] 8 FVALUES T[16] T[17] T[18] T[19] T[20] T[21] T[22] T[23] 8 FVALUES T[24] T[25] T[26] T[27] T[28] T[29] T[30] T[31] 4 FVALUES T[32] T[33] T[34] T[35] 0e FVALUE scale 0e FVALUE nulltime loops VALUE m 6 FVALUES sa xx uu ss vv ww -- There exists a magical n for which FLOPS is much, much, faster... -- Use findbest.frt ( set Nlimit lower ) : FMS? ( F: -- time ) MS? S>F 1e-3 F* ; : SFMS? ( F: -- stime ) FMS? scale F* nulltime F- ; \ ( * ************************************************* \ Module 1. Calculate integral of df(x)/f(x) defined \ below. Result is ln(f(1)). There are 14 \ double precision operations per loop \ ( 7 +, 0 -, 6 *, 1 / ) that are included \ in the timing. \ 50.0% +, 00.0% -, 42.9% *, and 07.1% / \ ************************************************* * ) : MODULE-1 ( -- ) loops 0 LOCALS| sa n | 1e 0e 0e 0e 0e [FTO] xx [FTO] uu [FTO] ss [FTO] vv [FTO] ww BEGIN sa TLimit < WHILE 0e [FTO] ss 0e [FTO] vv 1e [FTO] ww n 2* DUP TO n S>F 1/F [FTO] xx TIMER-RESET n 1 DO ww vv F+ [FTO] vv vv xx F* ( uu ) D3 FOVER F* D2 F+ FOVER F* D1 F+ FSWAP ( t uu ) E3 FOVER F* E2 F+ FOVER F* D1 F+ F* ww F+ F/ ss F+ [FTO] ss LOOP MS? TO sa n NLimit >= UNTIL THEN 1e6 n S>F F/ FDUP [FTO] scale [FTO] T[1] TIMER-RESET n 0 DO LOOP FMS? scale F* 0e FMAX [FTO] nulltime sa S>F 1e-3 F* scale F* nulltime F- [FTO] T[2] T[2] 14e ( #flops?) F/ FDUP [FTO] T[3] 1/F [FTO] T[4] D1 D2 F+ D3 F+ D1 E2 F+ E3 F+ F1+ F/ D1 F+ ss F2* F+ F2/ xx F* 1/F FDUP 40e3 F* scale F/ F>S TO m ( note: m is multiple of 4 ) ( sb) 25.2e F- ( error) ." 1 " ( error) FE. 7 SPACES T[2] FE. 5 SPACES T[4] FE. 5 SPACES ; \ ( * ***************************************************** \ Module 2. Calculate value of PI from Taylor Series \ expansion of atan(1.0). There are 7 \ double precision operations per loop \ ( 3 +, 2 -, 1 *, 1 / ) that are included \ in the timing. \ 42.9% +, 28.6% -, 14.3% *, and 14.3% / \ ***************************************************** * ) : MODULE-2 ( -- ) 1e 0e 0e 0e 0e -1e [FTO] sa [FTO] xx [FTO] uu [FTO] ss [FTO] vv [FTO] ww TIMER-RESET ( note: m is multiple of 4 ) m 2/ 0 DO 2e uu F+ [FTO] uu 2e uu F+ [FTO] uu LOOP FMS? scale F* 0e FMAX [FTO] T[5] sa [FTO] uu 0e [FTO] vv 0e [FTO] ww 0e [FTO] xx TIMER-RESET -5e 5e uu m 2/ 0 DO 2e F+ F2DUP F- xx F+ [FTO] xx 2 FPICK FOVER F* vv F+ [FTO] vv F2DUP F/ ww F+ [FTO] ww 2e F+ 2 FPICK FOVER F- xx F+ [FTO] xx F2DUP F* vv F+ [FTO] vv 2 FPICK FOVER F/ ww F+ [FTO] ww ( a b c -- a b c a/c ) LOOP FDROP F2DROP FMS? scale F* [FTO] T[6] T[6] T[5] F- 7e ( #flops) F/ FDUP [FTO] T[7] 1/F [FTO] T[8] sa xx F* m S>F F/ F>S TO m ww 4e F* 5e F/ 5e vv F/ F+ ( sb) 31.25e vv FDUP FSQR F* F/ F- PI F- ( pi_error) CR ." 2 " FE. 7 SPACES T[6] T[5] F- FE. 5 SPACES T[8] FE. 5 SPACES ; \ ( * ****************************************************** \ Module 3. Calculate integral of sin(x) from 0.0 to \ PI/3.0 using Trapezoidal Method. Result \ is 0.5. There are 17 double precision \ operations per loop (6 +, 2 -, 9 *, 0 /) \ included in the timing. \ 35.3% +, 11.8% -, 52.9% *, and 00.0% / \ ****************************************************** * ) : MODULE-3 ( -- ) PI m 3 * S>F F/ 0e 0e 0e 0e [FTO] uu [FTO] ss [FTO] vv [FTO] ww [FTO] xx TIMER-RESET m 1 DO 1e vv F+ [FTO] vv vv xx F* [FTO] uu uu FSQR A6 FOVER F* A5 F- FOVER F* A4 F+ FOVER F* A3 F- FOVER F* A2 F+ FOVER F* A1 F+ F* F1+ uu F* ss F+ [FTO] ss LOOP SFMS? FDUP [FTO] T[9] 17e F/ FDUP [FTO] T[10] 1/F [FTO] T[11] PI 3e F/ [FTO] uu uu FSQR [FTO] ww A6 ww F* A5 F- ww F* A4 F+ ww F* A3 F- ww F* A2 F+ ww F* A1 F+ ww F* 1e F+ uu F* ( sa) ss F2* F+ F2/ xx F* 0.5e F- ( error ) CR ." 3 " FE. 7 SPACES T[9] FE. 5 SPACES T[11] FE. 5 SPACES ; \ ( * *********************************************************** \ Module 4. Calculate Integral of cos(x) from 0.0 to PI/3 \ using the Trapezoidal Method. Result is \ sin(PI/3). There are 15 double precision \ operations per loop (7 +, 0 -, 8 *, and 0 / ) \ included in the timing. \ 50.0% +, 00.0% -, 50.0% *, 00.0% / \ *********************************************************** * ) : MODULE-4 ( -- ) A3 FNEGATE [FTO] A3 A5 FNEGATE [FTO] A5 PI m 3 * S>F F/ 0e 0e 0e 0e [FTO] uu [FTO] ss [FTO] vv [FTO] ww [FTO] xx TIMER-RESET 0e m 1 DO F1+ FDUP xx F* FSQR B6 FOVER F* B5 F+ FOVER F* B4 F+ FOVER F* B3 F+ FOVER F* B2 F+ FOVER F* B1 F+ F* F1+ ss F+ [FTO] ss LOOP FDROP SFMS? FDUP [FTO] T[12] 15e F/ FDUP [FTO] T[13] 1/F [FTO] T[14] PI 3e F/ [FTO] uu uu FSQR [FTO] ww B6 ww F* B5 F+ ww F* B4 F+ ww F* B3 F+ ww F* B2 F+ ww F* B1 F+ ww F* F1+ ( sa) ss F2* F1+ F+ F2/ xx F* ( sa ) A6 ww F* A5 F+ ww F* A4 F+ ww F* A3 F+ ww F* A2 F+ ww F* A1 F+ ww F* A0 F+ uu F* ( sb) F- ( error ) CR ." 4 " FE. 7 SPACES T[12] FE. 5 SPACES T[14] FE. 5 SPACES ; \ ( * *********************************************************** \ Module 5. Calculate Integral of tan(x) from 0.0 to PI/3 \ using the Trapezoidal Method. Result is \ ln(cos(PI/3)). There are 29 double precision \ operations per loop (13 +, 0 -, 15 *, and 1 /) \ included in the timing. \ 46.7% +, 00.0% -, 50.0% *, and 03.3% / \ *********************************************************** * ) : MODULE-5 ( -- ) PI m 3 * S>F F/ 0e 0e 0e 0e [FTO] uu [FTO] ss [FTO] vv [FTO] ww [FTO] xx TIMER-RESET 0e m 1 DO F1+ FDUP xx F* FDUP FSQR FSWAP FOVER ( ww uu ww ) A6 FOVER F* A5 F+ FOVER F* A4 F+ FOVER F* A3 F+ FOVER F* A2 F+ FOVER F* A1 F+ F* F1+ F* ( ww vv ) FSWAP B6 FOVER F* B5 F+ FOVER F* B4 F+ FOVER F* B3 F+ FOVER F* B2 F+ FOVER F* B1 F+ F* F1+ F/ ss F+ [FTO] ss LOOP FDROP SFMS? FDUP [FTO] T[15] 29e F/ FDUP [FTO] T[16] 1/F [FTO] T[17] PI 3e F/ [FTO] uu uu FSQR [FTO] ww A6 ww F* A5 F+ ww F* A4 F+ ww F* A3 F+ ww F* A2 F+ ww F* A1 F+ ww F* F1+ uu F* ( sa ) B6 ww F* B5 F+ ww F* B4 F+ ww F* B3 F+ ww F* B2 F+ ww F* B1 F+ ww F* F1+ F/ ( sa/sb ) ss F2* F+ F2/ xx F* 0.6931471805599453e F- ( error ) CR ." 5 " FE. 7 SPACES T[15] FE. 5 SPACES T[17] FE. 5 SPACES ; \ ( * *********************************************************** \ Module 6. Calculate Integral of sin(x)*cos(x) from 0.0 \ to PI/4 using the Trapezoidal Method. Result \ is sin(PI/4)^2. There are 29 double precision \ operations per loop (13 +, 0 -, 16 *, and 0 /) \ included in the timing. \ 46.7% +, 00.0% -, 53.3% *, and 00.0% / \ *********************************************************** * ) : MODULE-6 ( -- ) PI m 4 * S>F F/ 0e 0e 0e 0e [FTO] uu [FTO] ss [FTO] vv [FTO] ww [FTO] xx TIMER-RESET 0e m 1 DO F1+ FDUP xx F* FDUP FSQR ( uu ww ) FSWAP FOVER ( ww uu ww ) A6 FOVER F* A5 F+ FOVER F* A4 F+ FOVER F* A3 F+ FOVER F* A2 F+ FOVER F* A1 F+ F* F1+ F* ( vv ) FSWAP B6 FOVER F* B5 F+ FOVER F* B4 F+ FOVER F* B3 F+ FOVER F* B2 F+ FOVER F* B1 F+ F* F1+ F* ss F+ [FTO] ss LOOP FDROP SFMS? FDUP [FTO] T[18] 29e F/ FDUP [FTO] T[19] 1/F [FTO] T[20] PI 4e F/ [FTO] uu uu FSQR [FTO] ww A6 ww F* A5 F+ ww F* A4 F+ ww F* A3 F+ ww F* A2 F+ ww F* A1 F+ ww F* F1+ uu F* ( sa ) B6 ww F* B5 F+ ww F* B4 F+ ww F* B3 F+ ww F* B2 F+ ww F* B1 F+ ww F* F1+ F* ( sa*sb ) ss F2* F+ F2/ xx F* 0.25e F- ( error ) CR ." 6 " FE. 7 SPACES T[18] FE. 5 SPACES T[20] FE. 5 SPACES ; \ ( * ****************************************************** \ Module 7. Calculate value of the definite integral \ from 0 to sa of 1/(x+1), x/(x*x+1), and \ x*x/(x*x*x+1) using the Trapezoidal Rule. \ There are 12 double precision operations \ per loop ( 3 +, 3 -, 3 *, and 3 / ) that \ are included in the timing. \ 25.0% +, 25.0% -, 25.0% *, and 25.0% / \ ****************************************************** * ) : MODULE-7 ( -- ) 0e 0e 0e 0e 1e 102.3321513995275e [FTO] sa [FTO] ww [FTO] ss [FTO] uu [FTO] vv [FTO] xx sa m S>F F/ [FTO] vv TIMER-RESET 0e m 1 DO F1+ FDUP vv F* FDUP [FTO] xx ( xx) FSQR [FTO] uu ss ww FDUP xx F+ F/ xx uu ww F+ F/ F+ uu FDUP xx F* ww F+ F/ F+ F- [FTO] ss LOOP FDROP SFMS? FDUP [FTO] T[21] 12e F/ FDUP [FTO] T[22] 1/F [FTO] T[23] sa [FTO] xx xx FSQR [FTO] uu ww ww xx ww F+ F/ F+ xx uu ww F+ F/ F+ uu xx uu F* ww F+ F/ F+ FNEGATE ( sa) ss F2* F+ vv F* 18e F* [FTO] sa sa -2000e F* scale F/ F>S TO m sa 500.2e F+ ( error) CR ." 7 " FE. 7 SPACES T[21] FE. 5 SPACES T[23] FE. 5 SPACES ; \ ( * *********************************************************** \ Module 8. Calculate Integral of sin(x)*cos(x)*cos(x) \ from 0 to PI/3 using the Trapezoidal Method. \ Result is (1-cos(PI/3)^3)/3. There are 30 \ double precision operations per loop included \ in the timing: \ 13 +, 0 -, 17 * 0 / \ 46.7% +, 00.0% -, 53.3% *, and 00.0% / \ *********************************************************** * ) : MODULE-8 ( -- ) PI m 3 * S>F F/ 0e 0e 0e 0e [FTO] uu [FTO] ss [FTO] vv [FTO] ww [FTO] xx TIMER-RESET 0e m 1 DO F1+ FDUP xx F* FDUP FSQR ( uu ww ) B6 FOVER F* B5 F+ FOVER F* B4 F+ FOVER F* B3 F+ FOVER F* B2 F+ FOVER F* B1 F+ FOVER F* F1+ FSQR ( vv^2 ) FSWAP A6 FOVER F* A5 F+ FOVER F* A4 F+ FOVER F* A3 F+ FOVER F* A2 F+ FOVER F* A1 F+ F* F1+ ( uu vv^2 w ) F* F* ss F+ [FTO] ss LOOP FDROP SFMS? FDUP [FTO] T[24] 30e F/ FDUP [FTO] T[25] 1/F [FTO] T[26] PI 3e F/ [FTO] uu uu FSQR [FTO] ww A6 ww F* A5 F+ ww F* A4 F+ ww F* A3 F+ ww F* A2 F+ ww F* A1 F+ ww F* F1+ uu F* ( sa ) B6 ww F* B5 F+ ww F* B4 F+ ww F* B3 F+ ww F* B2 F+ ww F* B1 F+ ww F* F1+ FSQR F* ( sa*sb^2 ) ss F2* F+ F2/ xx F* 0.29166666666666667e F- ( error ) CR ." 8 " FE. 7 SPACES T[24] FE. 5 SPACES T[26] FE. 5 SPACES ; \ ************************************************* \ MFLOPS(1) output. This is the same weighting \ used for all previous versions of the flops.c \ program. Includes Modules 2 and 3 only. \ ************************************************* : MFLOPS(1) ( F: -- t ) T[6] T[5] F- 5e F* T[9] F+ 52e F/ FDUP [FTO] T[27] 1/F FDUP [FTO] T[28] ; \ ************************************************* \ MFLOPS(2) output. This output does not include \ Module 2, but it still does 9.2% FDIV's. \ ************************************************* : MFLOPS(2) ( F: -- t ) T[2] T[9] F+ T[12] F+ T[15] F+ T[18] F+ T[21] 4e F* F+ 152e F/ FDUP [FTO] T[29] 1/F FDUP [FTO] T[30] ; \ ************************************************* \ MFLOPS(3) output. This output does not include \ Module 2, but it still does 3.4% FDIV's. \ ************************************************* : MFLOPS(3) ( F: -- t ) T[2] T[9] F+ T[12] F+ T[15] F+ T[18] F+ T[21] F+ T[24] F+ 146e F/ FDUP [FTO] T[31] 1/F FDUP [FTO] T[32] ; \ ************************************************* \ MFLOPS(4) output. This output does not include \ Module 2, and it does NO FDIV's. \ ************************************************* : MFLOPS(4) ( F: -- t ) T[9] T[12] F+ T[18] F+ T[24] F+ 91e F/ FDUP [FTO] T[33] 1/F FDUP [FTO] T[34] ; : MAIN ( -- ) PRECISION >R 4 SET-PRECISION CR ." FLOPS Forth Program (Double Precision), V2.0 14 Jan 2001" CR 1e6 loops S>F F/ [FTO] scale loops TO m 0.198412715551283e-3 [FTO] A3 0.2507059876207e-7 [FTO] A5 CR ." Module Error RunTime MFLOPS" CR ." (usec)" CR MODULE-1 MODULE-2 MODULE-3 MODULE-4 MODULE-5 MODULE-6 MODULE-7 MODULE-8 CR CR ." Iterations = " m EOL 10 .R CR ." NullTime (usec) = " nulltime EOL FE. CR ." MFLOPS(1) = " MFLOPS(1) EOL FE. CR ." MFLOPS(2) = " MFLOPS(2) EOL FE. CR ." MFLOPS(3) = " MFLOPS(3) EOL FE. CR ." MFLOPS(4) = " MFLOPS(4) EOL FE. CR R> SET-PRECISION ; 0 [IF] ( P54c ) FORTH> main FLOPS Forth Program (Double Precision), V2.0 14 Jan 2001 Module Error RunTime MFLOPS (usec) 1 4.4764E-13 0.4775 29.3194 2 -6.9400E-14 0.3472 20.1584 3 -8.7708E-15 0.3845 44.2133 4 4.2299E-14 0.3290 45.5927 5 2.4980E-14 0.7865 36.8722 6 -2.8311E-15 0.6035 48.0530 7 -5.1273E-11 0.8840 13.5747 8 3.0198E-14 0.5985 50.1253 Iterations = 4001600 NullTime (usec) = 0.0365 MFLOPS(1) = 24.5196 MFLOPS(2) = 24.8488 MFLOPS(3) = 35.9296 MFLOPS(4) = 47.5072 ( PII - 350 MHz ) --------------------------------------------- FLOPS Forth Program (Double Precision), V2.0 14 Jan 2001 Module Error RunTime MFLOPS (usec) 1 2.5935E-13 0.1568 89.3142 2 1.6160E-13 0.0729 96.0549 3 -1.9445E-14 0.1365 124.5421 4 -1.4575E-13 0.1266 118.5185 5 -1.1170E-13 0.2874 100.8915 6 -1.3711E-15 0.2406 120.5195 7 -6.2357E-11 0.3480 34.4828 8 -1.2270E-14 0.2268 132.3043 Iterations = 16006400 NullTime (usec) = 0.0114 MFLOPS(1) = 103.8183 MFLOPS(2) = 64.9607 MFLOPS(3) = 95.8870 MFLOPS(4) = 124.5829 ( Athlon 900 MHz ) ------------------------------------------ FLOPS Forth Program (Double Precision), V2.0 14 Jan 2001 Module Error RunTime MFLOPS (usec) 1 4.8317E-13 0.0260 538.4615 2 1.8135E-13 0.0150 466.1811 3 -9.6647E-15 0.0394 431.9174 4 1.5201E-13 0.0263 570.7491 5 -1.2225E-13 0.0498 582.0006 6 -6.9949E-15 0.0454 638.4589 7 -1.1880E-11 0.0782 153.5386 8 -8.7737E-14 0.0452 662.9834 Iterations = 64025600 NullTime (usec) = 0.0073 MFLOPS(1) = 454.3965 MFLOPS(2) = 304.2948 MFLOPS(3) = 470.5171 MFLOPS(4) = 582.1671 ( lcc on p54c ) -------------------------------------------- FLOPS C Program (Double Precision), V2.0 18 Dec 1992 Module Error RunTime MFLOPS (usec) 1 -4.6896e-013 0.8074 17.3388 2 2.2160e-013 0.6187 11.3148 3 -6.9944e-015 0.8015 21.2102 4 -9.7256e-014 0.5705 26.2913 5 -1.6542e-014 1.2869 22.5352 6 4.3632e-014 1.1051 26.2429 7 -4.9454e-011 1.3232 9.0692 8 7.2164e-014 1.2262 24.4661 Iterations = 32000000 NullTime (usec) = 0.0181 MFLOPS(1) = 13.3512 MFLOPS(2) = 15.4095 MFLOPS(3) = 20.5035 MFLOPS(4) = 24.5728 "c:\lcc\src\flops\lcc\flops.exe" Return code 33 ( ms vc++ 6.0 on Athlon 900 MHz ) FLOPS C Program (Double Precision), V2.0 18 Dec 1992 Module Error RunTime MFLOPS (usec) 1 1.3358e-012 0.0311 449.5735 2 2.0517e-013 0.0188 373.3333 3 1.7542e-014 0.0423 402.2181 4 -5.4512e-014 0.0340 441.7855 5 3.3307e-016 0.0631 459.7473 6 -1.9040e-014 0.0617 470.3497 7 2.6034e-011 0.0521 230.2849 8 -5.4068e-014 0.0617 486.5687 Iterations = 64000000 NullTime (usec) = 0.0052 MFLOPS(1) = 382.3090 MFLOPS(2) = 345.0380 MFLOPS(3) = 422.1369 MFLOPS(4) = 456.0689 [THEN] : .ABOUT CR ." Try: MAIN " ; .ABOUT ( * End of Source * )