# Document Title: example-advanced.cfg # Subject: SPEC CPU2000 Example Config file # Last updated: 29-Oct-2001 jh # ------------------------------------------------------------------------ # This is an example CPU2000 config file. It attempts to illustrate # a number of more advanced features of the CPU2000 tools, including: # # basepeak # feedback-directed optimization # multiple CPUs during a build # multiple extensions for different purposes # multiple Fortran compilers (both f77 and f90) # multiple hardware architectures # multiple operating system versions # variable substitution # ################################################################ # HP Spec CPU 2000 Config file # # Version: CPU2000 Feature Testing Nov 1999 # # Author: Ryan Border, Hewlett Packard Company # # Additional Annotation: Cloyce Spradling, John Henning # ################################################################ # ( It is possible to have one config file that builds many ) # ( different kinds of binaries. This is done using the ) # ( "--extension" feature on the runspec command line (-e for ) # ( short) plus the third of the four section specifiers ) # ( (benchmark=tuning=extension=machine:). ) # # At the runspec command line, select extension (-e) as: # FAST: Best possible optimization (result submissions) # O2: That's "Oh2, not zero2" +O2 optimization # O3: +O3 optimization # GPROF: +O2 +G; instrumented for profiling # DEBUG: +O0 -g; optimizations off, debug # Other switches that need to be set follow - edit this config file # before issuing the runspec command, often by just removing or adding # comment marks. But there are some exceptions where you will have to # fill in more detail - see especially the remarks about "notes", below. ##################################################################### # Target selection # ##################################################################### # Set OS_LEVEL to one of the following: #OS_LEVEL = HPUX_1020 OS_LEVEL = HPUX_1100 # Select architecture compilation mode #CMODE = PA11 CMODE = PA20_32 #CMODE = PA20_64 # Select if you want f77 or f90 to be used for peak f77 codes # (For baseline, all codes are compiled using f90, as required # by the run rules) F77_PEAK = FORTRAN77 #F77_PEAK = FORTRAN90 # Select if you want archive or shared libraries: #LIB_MODE = SHARED LIB_MODE = ARCHIVE ##################################################################### # runspec controls # ##################################################################### # ( Remember, most runspec controls need to be in the header ) # ( section - that is, prior to the first instance of a section ) # ( marker. For more information, see config.html on config file ) # ( structure. ) # Set makeflags=-jN where N is the number of compile tasks # that you'd like to start up concurrently # This is probably the number of CPU's makeflags = -j1 # teeout and teerunout simply make runspec a lot more verbose teeout = yes teerunout = yes # Set this to zero to disable md5 checking, which normally makes sure # the binary to be used matches this config file. md5 checking must be # enabled for official submissions. # check_md5 = 0 # Set these to minimize disk space use. These also can't be used in an # official submission. #minimize_builddirs = yes #minimize_rundirs = yes # This lets us use some variables in the notes: expand_notes = 1 # ( To see which variables you can use in your notes, set the ) # ( output level of the tools to 99 (using the --verbose command ) # ( line flag) and look for lines that begin with 'Setting' in the ) # ( log file of the resultant run. Some examples are already in ) # ( config.html, though that's not an exhaustive list. The only ) # ( variables that can be expanded in notes must be set directly; ) # ( that is, if you set A=1, and B=$A, you can't use $B in your ) # ( notes and expect to see a '1'. If you use $A in your notes, ) # ( though, you should see a '1'. Additionally, most variables ) # ( that are set with a default value and a benchmark-specific ) # ( value (like OPTIMIZE, for example) probably won't come out ) # ( with the values that you expect. In general, this feature is ) # ( limited to variables that runspec handles, and does not reflect ) # ( variables interpreted by specmake. Try some experiments before ) # ( relying on this feature! ) ##################################################################### # Compiler architecture selection # ##################################################################### # Let's be very explicit about which architecture we're compiling on/for. # This should all be automatic, but is sometimes disfunctional on early # pre-ship prototype systems. If you use this config file for a submission # you must either document the +DA +DS in the notes section, OR assert # to SPEC that the final product will set these switches automatically. # See run rule 4.2.4. default=default=default=default: CC_PA11 = /opt/ansic/bin/cc -Ae +DA1.1 +DS1.1 CC_PA20_32 = /opt/ansic/bin/cc -Ae +DA2.0 +DS2.0 CC_PA20_64 = /opt/ansic/bin/cc -Ae +DA2.0W +DS2.0 +DD64 -DSPEC_CPU2000_LP64 CC = $(CC_$(CMODE)) CXX_PA11 = /opt/aCC/bin/aCC +DA1.1 +DS1.1 CXX_PA20_32 = /opt/aCC/bin/aCC +DA2.0 +DS2.0 CXX_PA20_64 = /opt/aCC/bin/aCC +DA2.0W +DS2.0 -DSPEC_CPU2000_LP64 CXX = $(CXX_$(CMODE)) F90_PA11 = /opt/fortran90/bin/f90 +DA1.1 +DS1.1 F90_PA20_32 = /opt/fortran90/bin/f90 +DA2.0 +DS2.0 F90_PA20_64 = /opt/fortran90/bin/f90 +DA2.0W +DS2.0 F90 = $(F90_$(CMODE)) FC = $(F90) F77_PA11 = /opt/fortran/bin/f77 +DA1.1 +DS1.1 F77_PA20_32 = /opt/fortran/bin/f77 +DA2.0 +DS2.0 # No 64bit mode is available for f77 compiler F77_PA20_64 = $(F77_PA20_32) ##################################################################### # Link-time selections ##################################################################### default=default=default=default: # Fastmem module available only for PA2.0 32bit FASTMEM_PA11 = FASTMEM_PA20_32 = /opt/langtools/lib/fastmem.o FASTMEM_PA20_64 = # Don't attempt to use +ESfic with shared libraries: ESFIC_ARCHIVE = +ESfic ESFIC_SHARED = # Library selection. Use large TLB pages on 11.0 systems. # 11.0 Page size can be selected here, or with kernel tunes. # Check your baseline flag count if you decide to do it here # (only 4 flags are allowed in baseline) #LIBS_ARCHIVE_HPUX_1100 = -Wl,-aarchive -Wl,+pd,256k LIBS_ARCHIVE_HPUX_1100 = -Wl,-aarchive #LIBS_SHARED_HPUX_1100 = -Wl,+pd,256 LIBS_SHARED_HPUX_1100 = # 10.20 lib flags (page size not tunable on 10.20) LIBS_ARCHIVE_HPUX_1020 = -Wl,-aarchive LIBS_SHARED_HPUX_1020 = LIBS = $(LIBS_$(LIB_MODE)_$(OS_LEVEL)) # ( Notes about variable substitution (as exemplified by the above ) # ( LIBS line): ) # ( ) # ( Expansion of variables to construct the name of a variable ) # ( works only for variables used in the Makefile. specmake does ) # ( this expansion, and the main tools have no knowledge of it or ) # ( its expansion. As such, it's fine to use (and will work), but ) # ( can't be interpolated into fields that only the main tools ) # ( deal with, such as notes. ) # ( ) # ( To view the contents of the Makefile generated for a given ) # ( run, you can ) # ( ) # ( 1) set the output level on the main tools to 50 or higher ) # ( (using the --verbose command line switch). The expansions ) # ( that can be performed by the main tools are done by that stage. ) # ( ) # ( or ) # ( ) # ( 2) Find the run directory, and examine what happened. For ) # ( example (using the very config file you are reading now): ) # ( ) # ( % grep build $SPEC/benchspec/CINT2000/164.gzip/run/list \ ) # ( | grep FAST | cut -b 1-9 ) # ( 00000008 ) # ( % cd $SPEC/benchspec/CINT2000/164.gzip/run/00000008 ) # ( ) # ( Once there, you can look at Makefile.spec, and some of the ) # ( other files generated for your build, such as: ) # ( ) # ( options.out - build options summary for 1 pass compile ) # ( options1.out - For N pass compile, summary of first pass ) # ( options2.out - For N pass compile, summary of second pass ) # ( make.out - detailed commands generated for 1 pass ) # ( compile ) # ( fdo_make_pass1.out - For N pass compile, detailed commands ) # ( generated for 1st pass ) # ( fdo_make_pass2.out - For N pass compile, detailed commands ) # ( generated for 2nd pass ) # ( ) # ( Let's walk through some of these files in the run directory ) # ( for 176.gcc as compiled with runspec -e FAST -T peak, using ) # ( this config file as shipped (i.e. no changes within the config ) # ( file to set any other operating system or hardware types etc.) ) # ( ) # ( $ grep -e '^LIBS ' -e '^LIB_MODE ' -e '^CMODE ' \ ) # ( > -e '^OS_LEVEL ' Makefile.spec ) # ( CMODE = PA20_32 ) # ( LIBS = $(FASTMEM_$(CMODE)) $(LIBS_$(LIB_MODE)_$(OS_LEVEL)) ) # ( LIB_MODE = ARCHIVE ) # ( OS_LEVEL = HPUX_1100 ) # ( ) # ( OK, as this config file is shipped, it causes Makefile.spec to ) # ( say that we are using the PA-RISC V2.0 architecture in 32 bit ) # ( mode with archive libraries on HP-UX V11.00. And: ) # ( ) # ( $ grep -e '^FASTMEM_PA20_32 ' -e '^LIBS_ARCHIVE_HPUX_1100' \ ) # ( > Makefile.spec ) # ( FASTMEM_PA20_32 = /opt/langtools/lib/fastmem.o ) # ( LIBS_ARCHIVE_HPUX_1100 = -Wl,-aarchive ) # ( ) # ( So we should now have enough clues to figure out that LIBS ) # ( should actually be set to: ) # ( "/opt/langtools/lib/fastmem.o -Wl,-aarchive" ) # ( Is it? ) # ( ) # ( $ grep LINK options2.out | fold -s -w50 ) # ( echo "LINK: /opt/ansic/bin/cc -Ae +DA2.0 +DS2.0 ) # ( +P +O4 +Ostaticprediction +ESlit +ESfic ) # ( /opt/langtools/lib/fastmem.o -Wl,-aarchive -lm ) # ( -o options" ) # ( ) # ( According to the summary, apparently yes! And if we look in ) # ( the actual commands fed to the linker? ) # ( ) # ( % tail -1 fdo_make_pass1.out | fold -s -w60 | tail -3 ) # ( insn-emit.o insn-attrtab.o m88k.o getpwd.o convert.o ) # ( bc-emit.o bc-optab.o obstack.o ) # ( /opt/langtools/lib/fastmem.o -Wl,-aarchive -lm -o cc1 ) # ( ) # ( Yes once more. ) ##################################################################### # f77 compiler selection # ##################################################################### # Fortran compiler/switch selections: # Must use f90 for all f77/f90 codes in base. # Should be automatic, but lets be sure. default=base=default=default: F77 = $(F90) F77OPTIMIZE = $(FOPTIMIZE) # But, we have our choice for peak: default=peak=default=default: F77_FORTRAN77 = $(F77_$(CMODE)) F77_FORTRAN90 = $(F90_$(CMODE)) F77 = $(F77_$(F77_PEAK)) # Set basepeak to yes (default). This will cause runspec to use the same # executable for both base and peak runs. But for some benchmarks # we may choose to override this default - will unset for benchmarks # with special peak performance flags or compilers. default=default=default=default: basepeak = yes # Above, basepeak was just set to "yes" for all the codes. # If you want to use the same tuning for both base and # peak, but you want to use a separate f77 compiler for peak, unset # it for the f77 codes. This must be done individually for each # of the f77 benchmarks. Uncomment the sections below as appropriate: #168.wupwise=default=default=default: #basepeak = no #171.swim=default=default=default: #basepeak = no #172.mgrid=default=default=default: #basepeak = no #173.applu=default=default=default: #basepeak = no #190.gafort=default=default=default: #basepeak = no #200.sixtrack=default=default=default: #basepeak = no #301.apsi=default=default=default: #basepeak = no ##################################################################### # +O2 Optimization (portability testing) # ##################################################################### default=default=O2=default: FOPTIMIZE = +O2 F77OPTIMIZE = +O2 COPTIMIZE = +O2 CXXOPTIMIZE = +O2 notes000 = +O2 Optimization ##################################################################### # +O3 Optimization # ##################################################################### default=default=O3=default: FOPTIMIZE = +O3 F77OPTIMIZE = +O3 COPTIMIZE = +O3 CXXOPTIMIZE = +O3 notes000 = +O3 Optimization ##################################################################### # gprof profiling, +O2 Optimization. # # LIB_MODE is ignored, this tuning will use shared libraries. # ##################################################################### default=default=GPROF=default: LIBS = $(LIBS_SHARED_$(OS_LEVEL)) FOPTIMIZE = +O2 -G F77OPTIMIZE = +O2 -G COPTIMIZE = +O2 -G CXXOPTIMIZE = +O2 -G notes000 = +O2 shared library optimization notes001 = Gprof profiling enabled ##################################################################### # Debug # # LIB_MODE is ignored, this tuning will use shared libraries. # ##################################################################### default=default=DEBUG=default: LIBS = $(LIBS_SHARED_$(OS_LEVEL)) FOPTIMIZE = +O0 -g F77OPTIMIZE = +O0 -g COPTIMIZE = +O0 -g CXXOPTIMIZE = +O0 -g notes000 = +O0 -g (debug compile) notes001 = Shared libraries. ##################################################################### # "FAST" Optimization- best performance # ##################################################################### # ( Note that in order to perform feedback directed optimization, all we ) # ( have to do is mention PASSn and the tools automatically invoke the ) # ( compiler twice, with a training run in between. All this happens in ) # ( the run directories, so the original sources remain untouched. ) default=base=FAST=default: COPTIMIZE = +Oall +Onovectorize PASS1_CFLAGS = +I PASS1_LDCFLAGS = $(PASS1_CFLAGS) PASS2_CFLAGS = +P PASS2_LDCFLAGS = $(PASS2_CFLAGS) CXXOPTIMIZE = +O2 FOPTIMIZE = +O3 +Odataprefetch +Onofltacc default=peak=FAST=default: F77OPTIMIZE_FORTRAN90 = $(FOPTIMIZE) F77OPTIMIZE_FORTRAN77 = +O4 +Odataprefetch +Onofltacc F77OPTIMIZE = $(F77OPTIMIZE_$(F77_PEAK)) # The notes for "FAST" tuning will definitely need to be adjusted # if you change any of the various variables in this file! Think # it through. Check what really happened, by looking in the log # file and/or options.out. default=default=FAST=default: notes001 = Base Flags: notes003 = Baseline linker flag, All: -Wl,-aarchive notes004 = notes005 = Peak Flags same as base except as noted: notes498 = Peak linker flag, All: -Wl,-aarchive notes499 = The +I/+P denotes the use of profile based optimization int=default=FAST=default: notes002 = C: +Oall +Onovectorize +I/+P, C++: +O2 fp=default=FAST=default: notes002 = C: +Oall +Onovectorize +I/+P, f90: +O3 +Odataprefetch +nofltacc notes006 = 168, 171, 172, 173, 190, 200, 301: Fortran77 compiler used. # Peak flags for "FAST" tuning follow # Note that for FAST we definitely want to disable basepeak for # certain benchmarks, because the tuning is different. 171.swim=default=FAST=default: basepeak = no 171.swim=peak=FAST=default: F77OPTIMIZE_FORTRAN77 = +O4 +Oloop_unroll +Odataprefetch +Ocachepadcommon F77OPTIMIZE_FORTRAN90 = +O3 +Oloop_unroll +Odataprefetch +Ocachepadcommon F77OPTIMIZE = $(F77OPTIMIZE_$(F77_PEAK)) notes171 = 171: +O4 +Oloop_unroll +Odataprefetch +Ocachepadcommon 172.mgrid=default=FAST=default: basepeak = no 172.mgrid=peak=FAST=default: F77OPTIMIZE = +O3 +Oloop_unroll +Odataprefetch +Onolimit +Onofltacc +Oinline_budget=1000000 notes172 = 172: +O3 +Oloop_unroll +Odataprefetch +Onolimit +Onofltacc +Oinline_budget=1000000 173.applu=default=FAST=default: basepeak = no 173.applu=peak=FAST=default: F77OPTIMIZE = +O3 +Oloop_unroll +Odataprefetch +Onofltacc notes173 = 173: +O3 +Oloop_unroll +Odataprefetch +Onofltacc 176.gcc=default=FAST=default: basepeak = no 176.gcc=peak=FAST=default: # The nested interpolation required for the COPTIMIZE line is fine and will # work, but means that we can't use it verbatim in the notes. :( COPTIMIZE = +O4 +Ostaticprediction +ESlit $(ESFIC_$(LIB_MODE)) +Onolimit PASS1_CFLAGS = +I PASS1_LDCFLAGS = $(PASS1_CFLAGS) PASS2_CFLAGS = +P PASS2_LDCFLAGS = $(PASS2_CFLAGS) # As above for LIBS LIBS = $(FASTMEM_$(CMODE)) $(LIBS_$(LIB_MODE)_$(OS_LEVEL)) # In a more perfect world (with better tools), the following line would # suffice: #notes176 = 176: $(COPTIMIZE) $(LIBS) +I/+P # Alas, because both of those values depend on variables whose names are # made of other variables' values, the main tools can't interpolate them # properly. As a result, you'll need to do the interpolation yourself # and fill in the notes field by hand. # This is not necessarily as bad as it sounds, as this way you can do # proper line wrapping (another thing that the tools can't do for you) notes176 = 176: +O4 +Ostaticprediction +ESlit +ESfic +Onolimit notes176_1 = /opt/langtools/lib/fastmem.o -Wl,-aarchive +I/+P 191.fma3d=default=FAST=default: basepeak = no 191.fma3d=peak=FAST=default: ONESTEP = yes FOPTIMIZE = +O3 +Oaggressive notes191 = 191: +O3 +Oaggressive ONESTEP=yes 253.perlbmk=default=FAST=default: basepeak = no 253.perlbmk=peak=FAST=default: COPTIMIZE = +O4 +ESlit $(ESFIC_$(LIB_MODE)) +Onolimit +Olibcalls +Owhole_program_mode PASS1_CFLAGS = +I PASS1_LDCFLAGS = $(PASS1_CFLAGS) PASS2_CFLAGS = +P PASS2_LDCFLAGS = $(PASS2_CFLAGS) LIBS = $(FASTMEM_$(CMODE)) $(LIBS_$(LIB_MODE)_$(OS_LEVEL)) # Likewise here as with the notes for 176.gcc notes253 = 253: +O4 +ESlit +ESfic +Onolimit notes253_1 = +Olibcalls +Owhole_program_mode notes253_2 = /opt/langtools/lib/fastmem.o -Wl,-aarchive +I/+P 301.apsi=default=FAST=default: basepeak = no 301.apsi=peak=FAST=default: F77OPTIMIZE_FORTRAN77 = +Oall F77OPTIMIZE_FORTRAN90 = +O3 F77OPTIMIZE = $(F77OPTIMIZE_$(F77_PEAK)) # Here's a slightly happier case (relative to 176.gcc and 253.perlbmk); # just uncomment the appropriate line depending on whether or not you're # using an F90 compiler for peak: #notes301 = 301: +Oall notes301 = 301: +O3 ######################################################################## # Benchmark specific portability flags. # ######################################################################## # Check portability notes carefully when any switches are changed. default=default=default=default: notes500 = notes501 = Benchmark Specific Portability Flags: # Wupwise requires static variable declaration, syntax for this depends # on what compiler we've chosen. 168.wupwise=default=default=default: FPORTABILITY = +save 168.wupwise=peak=default=default: F77PORTABILITY_FORTRAN77 = -K F77PORTABILITY_FORTRAN90 = $(FPORTABILITY) F77PORTABILITY = $(F77PORTABILITY_$(F77_PEAK)) 168.wupwise=default=default=default: notes668 = 168: +save 176.gcc=default=default=default: CPORTABILITY = -DHOST_WORDS_BIG_ENDIAN +Olibcalls # This note depends on the tuning selected, +Olibcalls is only required at low optimization notes676 = 176: -DHOST_WORDS_BIG_ENDIAN 176.gcc=default=FAST=default: CPORTABILITY = -DHOST_WORDS_BIG_ENDIAN notes676 = 176: -DHOST_WORDS_BIG_ENDIAN 178.galgel=default=default=default: FPORTABILITY = +source=fixed notes678 = 178: +source=fixed # This big mess does nothing more than allow parallel compiles, # getting the f90 module build order right. vendor_makefile=<