# Invocation command line:
# /home/cpu2017/bin/harness/runcpu --configfile amd1704na-rate-revC.cfg --tune base --nopower --runmode rate --tune base --size test:train:refrate fprate
# output_root was not used for this run
############################################################################
################################################################################
# AMD1704 SPEC CPU2017 V1.0.2 Rate Configuration File for 64-bit Linux
#
#      File name                : amd1704na-rate-revC.cfg
#      Creation Date            : July 31, 2017
#      CPU2017 Version          : 1.0.2
#      Supported benchmarks     : All Rate benchmarks (intrate, fprate)
#      Compiler name/version    : AOCC/1.0.0
#      Operating system version : Ubuntu 16.04
#      Supported OS's           : Ubuntu 16.04/17.04, RHEL 7.4
#      Hardware                 : AMD Naples (AMD64)
#      FP Base Pointer Size     : 64-bit
#      FP Peak Pointer Size     : 32/64-bit
#      INT Base Pointer Size    : 64-bit
#      INT Peak Pointer Size    : 32/64-bit
#      Auto Parallization       : No
#
# Note: DO NOT EDIT THIS FILE, the only edits required to properly run these
# binaries are made in AMD1704na-rate-revC.inc.  Please consult Readme.amd1704na_revC.txt
# for a few uncommon exceptions which require edits to this file.
#
# Description:#
# This binary package automates away many of the complexities necessary to set
# up and run SPEC CPU2017 under optimized conditions on AMD Naples-based
# server platforms within Linux (AMD64).
#
# The binary package was built specifically for AMD Naples microprocessors and
# is not intended to run on other products.
#
# Please install the binary package by following the instructions in
# "Readme.amd1704na_revC.txt" under the "How To Use the Binaries" section.
#
# The binary package is designed to work without alteration on two socket AMD
# Naples-based servers with 32 cores per socket, SMT enabled and 512 GiB of DDR4
# memory distributed evenly among all 16 channels using 16 GiB DIMMs.
#
# To run the binary package on other Naples configurations, please review
# "Readme.amd1704na_revC.txt" for instructions on how to easily modify
# the CPU2017 configuration include file "AMD1704na-rate-revC.inc".  Typically,
# the number of sockets, number of cores per socket, SMT state and memory
# size values will need to be changed in amd1704na-rate-revX.inc.  Furthermore,
# modify necessary system specific and test specific documentation within this
# file.  Again, refer to Readme.amd1704na-rate-revX.txt for more information.
#
# In most cases, it should be unnecessary to edit "AMD1704na-rate-revC.cfg" or any
# other file besides "AMD1704na-rate-revC.inc".
#
# The config file automatically sets the number of rate copies equal to the
# number of logical processors and automatically binds each copy to a unique
# logical core.
#
# The run script and accompanying binary package are designed to work on Ubuntu
# 17.04/16.04 and RHEL 7.4.
#
# Important! If you write your own run script, please set the stack size to
# "unlimited" when executing this binary package.  Failure to do so may cause
# some benchmarks to overflow the stack.  For example, to set stack size within
# the bash shell, include the following line somewhere at the top of your run
# script before the runcpu invocation:
#
#      ulimit -s unlimited
#
# Modification of this config file should only be necessary if you intend to
# rebuild the binaries.  General instructions for rebuilding the binaries are
# found in-line below.
#
################################################################################
# Binary label extension and "allow_build"" switch
################################################################################
# Only modify the binary label extension if you plan to rebuild the binaries.
%define ext amd1704-rate-revC

# If you plan to recompile these CPU2017 binaries, please choose a new extension
# name (ext above) to avoid confusion with the current binary set on your system
# under test, and to avoid confusion for SPEC submission reviewers.  You will
# also need to set "allow_build" to true below. Finally, you must modify the
# Paths section below to point to your library locations if the paths are not
# already set up in your build environment.

# Change the following line to true if you intend to REBUILD the binaries (AMD
# does not support this). Valid values are "true" or "false" (no quotes).
%define allow_build false
#
################################################################################
# Paths -- MODIFY AS NEEDED (modification should not be necessary for runs)
################################################################################
# Set location of runtime libraries for runs or builds.
preenv                   = 1
%define lib_dir amd1704-rate-libs-revC
# The following path will have to be changed on the build system if different:
JEMALLOC_LIB32_PATH      = /root/work/lib/jemalloc/lib32
%if '%{allow_build}' eq 'false'
  fail_build             = 1
  # Runtime libraries:
  preENV_LD_LIBRARY_PATH   = $[top]/%{lib_dir}/64;$[top]/%{lib_dir}/32:%{ENV_LD_LIBRARY_PATH}
%elif '%{allow_build}' eq 'true'
  # If you intend to rebuild, be sure to set the library paths either in the
  # build script or here:
% define build_ncpus 16    # controls number of simultaneous compiles
  fail_build             = 0
  makeflags              = --jobs=%{build_ncpus} --load-average=%{build_ncpus}
%else
% error The value of "allow_build" is %{allow_build}, but it can only be "true" or "false". This error was generated
%endif

################################################################################
# Header settings
################################################################################
backup_config            = 1 # set to 0 if you do not want backup files
bench_post_setup         = sync
# command_add_redirect: If set, the generated ${command} will include
# redirection operators (stdout, stderr), which are passed along to the shell
# that executes the command. If this variable is not set, specinvoke does the
# redirection. NOTE: this value must be "yes" for the affinity generation
# section of this config file.
command_add_redirect     = yes
check_md5                = yes
env_vars                 = no
flagsurl000              = http://www.spec.org/cpu2017/flags/gcc.2017-11-20.xml
flagsurl001              = http://www.spec.org/cpu2017/flags/aocc100-flags-revC-I.xml
flagsurl002 = http://www.spec.org/cpu2017/flags/HPE-Platform-Flags-AMD-V1.2-EPYC-revD.xml
#flagsurl03              = $[top]/amd1704-INVALID-platform-revC-I.xml
ignore_errors            = no
# label: Arbitrary string, tags your binaries & directories.
label                    = %{ext}
line_width               = 1020
log_line_width           = 1020
mean_anyway              = yes
output_format            = all
reportable               = yes
size                     = test,train,ref
teeout                   = yes
teerunout                = yes
tune                     = base,peak

################################################################################
# Include file name
################################################################################
# The include file contains fields that are commonly changed. This is usually
# the only file that needs modification for runs.
%define inc_file_name AMD1704na-rate-revC.inc
#include: %{inc_file_name}
#  ----- Begin inclusion of 'AMD1704na-rate-revC.inc'
############################################################################
################################################################################
# Machine Specific Settings
################################################################################
# This binary package is designed to work without alteration on two socket AMD
# Naples-based servers with 32 cores per socket, SMT enabled and 512 GiB of DDR4
# memory distributed evenly among all 16 channels using 16 GiB DIMMs.
#
# To run the binary package on other Naples configurations, please review
# "Readme.amd1704na_revC.txt" for instructions on how to easily modify
# this include file ("AMD1704na-rate-revC.inc").  Typically, the number of sockets,
# number of cores and NUMA nodes per socket, SMT state and memory size values
# will need to be changed in in this file.  Furthermore, modify necessary system
# specific and test specific documentation within this file.  Again, refer to
# "Readme.amd1704na-rate-revX.txt"" for more information.
#
# Note that the submit command is written to dynamically generate bash scripts
# which assume that each socket has the same number of nodes and that each node
# services the same number of logical cores and that both logical cores and NUMA
# nodes are enumerated sequentially.  If these assumptions are not correct for
# your system, process affinity for each benchmark copy might not get set
# correctly, which can lead to suboptimal results or even run errors.

################################################################################
# ****************************** Required fields *******************************
# The following fields must be updated to reflect SUT configuration for
# functional purposes. If these fields are incorrect, CPU2017 might not run or
# might run suboptimally.
################################################################################

# How many sockets does your test system have? (Naples supports 1 or 2)
%define num_sockets 2

# How many physical cores per socket does your system have?
# (1, 2, 4, 8, 16, 24 or 32)
%define cores_per_socket 24

# All Naples parts have four NUMA nodes, but the core count differs from 8, 16,
# 24 to 32. Ryzen 7 parts have 1 node and 8 cores.
%define nodes_per_socket 4

# Is SMT enabled for the upcoming SPEC CPU2017 run? (disabled or enabled)
# Setting enabled will account for and configure two cpu2017 copies per
# physical core.
%define smt enabled

# Approximately how much memory (in GiB) do you have in your system?
# Typical values are 8, 16, 32, 64, 128, 256 or 512.  If your SUT's memory
# capacity is between two of those values then round down.  If this value is
# over 512 GiB, choose 512.
#
# Update the line below if your SUT's memory capacity is below 512 GiB:
%define memory_size 1024

################################################################################
# ****************************** Optional fields *******************************
# The following optional fields can be updated to simplify memory documentation.
# Alternatively, you can edit the fields hw_memory000 and hw_memory001 as
# required in the documentation section at the end of this file. See:
# https://www.jedec.org/standards-documents/docs/module4_20_26
################################################################################

# What is the rated frequency (MT/s) of the DDR4 memory in your system?
# (2400, 2667)
%define memory_freq 2667

# At what speed is your DDR4 memory running? For example, its rated frequency
# might be 2667 MT/s, but it is running at a down-clocked speed of 2400.
# (2400, 2667)
%define memory_freq_actual 2667

# Specify the number of DDR4 memory sticks in your SUT:
%define mem_sticks 16

# What is your DDR4 memory rank? (1 or 2)
%define memory_rank 4

# What is the DDR4 device organization bit width? (4 or 8)
%define dram_bus_width 4

# What is the DDR4 speed grade (CL-tRCD-tRP)?
# J = 10-10-10
# K = 11-11-11
# L = 12-12-12
# M = 13-13-13
# N = 14-14-14
# P = 15-15-15
# R = 16-16-16
# U = 18-18-18
# T = 17-17-17
# V = 19-19-19
%define memory_speed_grade V

# What is the DDR4 module type?
%define mem_module_type L

################################################################################
# You should not need to change anything in this section unless you have a non-
# standard configuration.  See Readme.amd1704na-rate-revC for more information.
# You will likely have to edit the Test Information further down.
################################################################################
%define mem_stick_size %{memory_size} / %{mem_sticks}
%define cpucores %{cores_per_socket} * %{num_sockets}

# Naples is only available in one or two socket configurations:
%if ((%{num_sockets} != 1) && (%{num_sockets} != 2))
%  error Please define "num_sockets" to either 1 or 2 in %{inc_file_name}. This error was generated
%endif

# This config file supports 1, 2, 4, 8, 16, 24 and 32 physical cores / socket:
%if ((%{cores_per_socket} == 1) || (%{cores_per_socket} == 2) || (%{cores_per_socket} == 4) || (%{cores_per_socket}==8) || (%{cores_per_socket}==16) || (%{cores_per_socket}==24) || (%{cores_per_socket}==32))
%  define cores_per_node %{cores_per_socket} / %{nodes_per_socket}
%else
%  error Please define "cores_per_node" to 1, 2, 4, 8, 16, 24 or 32 in %{inc_file_name}. This error was generated
%endif

# Calculate the total number of logical cores:
%define physical_core_count %{cores_per_socket} * %{num_sockets}
%if ('%{smt}' eq 'enabled')
%  define threadspercore 2
%  define logical_core_count 2 * %{physical_core_count}
%elif ('%{smt}' eq 'disabled')
%  define threadspercore 1
%  define logical_core_count %{physical_core_count}
%else
%  error Please define "smt" to either "enabled" or "disabled" in %{inc_file_name}. This error was generated
%endif
%define total_nodes %{nodes_per_socket} * %{num_sockets}
%if (%{logical_core_count} >= %{total_nodes})
%  define logical_cores_per_node %{logical_core_count} / %{total_nodes}
%else
%  define logical_cores_per_node 1
%endif

# Calculate the amount of memory per logical core:
%define GB_mem_per_core %{memory_size} / %{logical_core_count}

# CPU2017 requires at least 4 GiB per logical core:
%if %{GB_mem_per_core} < 4
%  error These SPEC CPU binaries require at least 4 GiB of memory per copy. The value for "memory_size" in %{inc_file_name} indicates you have not met this requirement. This error was generated
%endif

# Do you want to use hugectl? Define the hugectl command here:
#%define hugectl_cmd hugectl -heap
%define hugectl_cmd

# %if %{GB_mem_per_core} >= 4
# %  define huge_pages_per_copy  896     # Number of huge pages / copy
# %  define num_system_huge_pages %{logical_core_count} * %{huge_pages_per_copy}
# %  define thp_setting disabled
# %elif %{GB_mem_per_core} >= 3
# %  define huge_pages_per_copy  480     # Number of huge pages / copy
# %  define num_system_huge_pages %{logical_core_count} * %{huge_pages_per_copy}
# %  define thp_setting disabled
# %else
# %  define num_system_huge_pages 0
# %  define thp_setting enabled
# %endif

%define num_system_huge_pages 0
%define thp_setting enabled

# Naples has 64kB L1i / physical core:
%define chip_total_L1i 64 * %{cores_per_socket}

# Naples has 32kB L1d / physical core:
%define chip_total_L1d 32 * %{cores_per_socket}

# Naples has 512 kB L2 / core:
%define chip_total_L2_MB %{cores_per_socket} / 2

################################################################################
# Test information
#
# Most of the lines below will need to be modified to reflect your test
# conditions.
################################################################################
# Tester information
# To understand the difference between hw_vendor/sponsor/tester, see:
# www.spec.org/auto/cpu2017/Docs/config.html#test_sponsor
################################################################################
license_num              =3
tester                   =HPE
test_sponsor             =HPE
hw_vendor                =Hewlett Packard Enterprise
hw_model000              =ProLiant DL385 Gen10
hw_model001              =(2.00 GHz, AMD EPYC 7401)

#--------- If you install new compilers, edit this section --------------------
sw_compiler000           =C/C++: Version 1.0.0 of AOCC
sw_compiler001           =Fortran: Version 4.8.2 of GCC

################################################################################
# Hardware information
################################################################################
                          # Example                             # Brief info about field
hw_avail                 =Nov-2017
sw_avail                 =Sep-2017
#hw_cpu_name              =AMD EPYC XXxx                         # Usually set correctly by sysinfo
hw_cpu_nominal_mhz       =2000
hw_cpu_max_mhz           =3000
hw_ncores                =48
hw_nthreadspercore       =2
%if %{num_sockets} == 1
 hw_ncpuorder            =1 chip
%elif %{num_sockets} == 2
 hw_ncpuorder            =1, 2 chip(s)
%endif #num_sockets

hw_other                 =None                                  # Other perf-relevant hw, or "None"
sw_other000              =jemalloc general purpose malloc implementation
sw_other001 = v4.5.0
fw_bios                  =HPE BIOS Version A40 released Nov-2017 (tested with A40 (11/10/2017))

sw_base_ptrsize          =64-bit
sw_peak_ptrsize          =Not Applicable

hw_pcache                =64 KB I + 32 KB D on chip per core
hw_scache                =512 KB I+D on chip per core
hw_tcache                =64 MB I+D on chip per chip, 8 MB shared / 3 cores
hw_ocache                =None

################################################################################
# Notes
################################################################################
notes_020                =The AMD64 AOCC Compiler Suite is available at
notes_025                =http://developer.amd.com/tools-and-sdks/cpu-development/amd-optimizing-cc-compiler/
notes_030                =
notes_035                =Binaries were compiled on a system with 2x AMD EPYC 7601 CPU + 512GB Memory using RHEL 7.4
notes_040                =
notes_045                =jemalloc, a general purpose malloc implementation, was obtained at
notes_050                =https://github.com/jemalloc/jemalloc/releases/download/4.5.0/jemalloc-4.5.0.tar.bz2
notes_055                =jemalloc was built with GCC v4.8.5 in RHEL v7.2 under default conditions.
notes_060                =jemalloc uses environment variable MALLOC_CONF with values narenas and lg_chunk:
notes_065                =  narenas: sets the maximum number of arenas to use for automatic multiplexing
notes_070                =           of threads and arenas.
notes_075                =  lg_chunk: set the virtual memory chunk size (log base 2). For example,
notes_080                =            lg_chunk:21 sets the default chunk size to 2^21 = 2MiB.
notes_085                =
notes_090                =The AOCC Gold Linker plugin was installed and used for the link stage.
notes_095                =
notes_100                =The AOCC Fortran Plugin version 1.0 was used to leverage AOCC optimizers
notes_105                =with gfortran. It is available here:
notes_110                =http://developer.amd.com/amd-aocc/

notes_submit_000         ='numactl' was used to bind copies to the cores.
notes_submit_005         =See the configuration file for details.

notes_os_000             ='ulimit -s unlimited' was used to set environment stack size
notes_os_005             ='ulimit -l 2097152' was used to set environment locked pages in memory limit
notes_os_010             =
notes_os_015             =runspec command invoked through numactl i.e.:
notes_os_020             =numactl --interleave=all runspec <etc>
notes_os_025             =
notes_os_030             =Set dirty_ratio=8 to limit dirty cache to 8% of memory
notes_os_035             =Set swappiness=1 to swap only if necessary
notes_os_040             =Set zone_reclaim_mode=1 to free local node memory and avoid remote memory
notes_os_045             =sync then drop_caches=3 to reset caches before invoking runcpu
notes_os_050             =Linux governor set to performance with cpupower "cpupower frequency-set -r -g performance"
notes_os_055             =dirty_ratio, swappiness, zone_reclaim_mode and drop_caches were
notes_os_060             =all set using privileged echo (e.g. echo 1 > /proc/sys/vm/swappiness).

%if ('%{thp_setting}' eq 'enabled')
 notes_os_065            =
 notes_os_070            =Transparent huge pages were enabled for this run (OS default)
%else
% warning Transparent huge pages define not set properly, so its setting was not included in this run's documentation.  This warning was generated
%endif #thp_setting

%if %{num_system_huge_pages} != 0
  notes_os_020           =
  notes_os_025           =Set vm/nr_hugepages=%{num_system_huge_pages} in /etc/sysctl.conf
  notes_os_030           =mount -t hugetlbfs nodev /mnt/hugepages
%else
  notes_os_075           =
  notes_os_080           =Huge pages were not configured for this run.
  notes_os_085           =
%endif #num_system_huge_pages

intrate,fprate:
# the following fields must be set here or they will be overwritten by sysinfo:
hw_disk                  =1 x 400 GB SAS SSD, RAID 0

%if (('%{memory_freq}' eq '2400') || ('%{memory_freq}' eq '2667'))
% if (%{memory_freq_actual} != %{memory_freq})
   hw_memory000          =%{memory_size} GB (%{mem_sticks} x %{mem_stick_size} GB %{memory_rank}Rx%{dram_bus_width} PC4-%{memory_freq}%{memory_speed_grade}-%{mem_module_type}) running at
   hw_memory001          =%{memory_freq_actual})
% else
   hw_memory000          =1 TB (16 x 64 GB 4Rx4 PC4-2666V-L)
   hw_memory001          =
% endif
%else
% error Please define "memory_freq" to a valid value in %{inc_file_name}.  This error was generated
%endif #memory_freq

# If you do not want to use the memory information constructed above, override
# them here by uncommenting the following example fields and fill them out
# according to: nnn GB (i x j GB kRxl PC4-mmmmn-o). See:
# https://www.jedec.org/standards-documents/docs/module4_20_26
# Example:
#hw_memory000            =512 GB (16 x 32 GB 2Rx4 PC4-2667V-R, running at
#hw_memory001            =2400)
# The following field must be made blank or sysinfo will output placeholder
# information in it:
hw_memory002             =

hw_nchips                =2
prepared_by              =HPE Performance Engineering
sw_file                  =xfs
sw_os000                 =SUSE Linux Enterprise Server 12 (x86_64) SP3
sw_os001                 =Kernel 4.4.73-5-default
sw_os002                 =
sw_state                 =Run level 3 (multi-user)
# ---- End inclusion of '/home/cpu2017/config/AMD1704na-rate-revC.inc'

# Switch back to default block after the include file:
default:
################################################################################
# Auto-configured settings based on your .inc file
################################################################################
# "copies" applies only to intrate and fprate:
copies                  = %{logical_core_count}

# Verify computed logical core count:
%if (%{logical_core_count} < 1) || (%{logical_core_count} > 128)
%  error The number of logical cores is calculated to be %{logical_core_count}, which is invalid. Check "num_sockets" and "cores_per_socket". This error was generated
%endif

#%if %{num_system_huge_pages} > 0
#  preENV_HUGETLB_LIMIT = %{huge_pages_per_copy}
#%endif

###############################################################################
###############################################################################
# The following lines create a bash script that is used to calculate the NUMA
# node number and the physical core number for setting the affinity for each
# benchmark copy.  Be careful to protect (with "\") all "$" characters that need
# to be in the shell script. Also, do not use single quotes since that character
# is used during dynamic script creation.
#
# Note that the generated script assumes that each socket has the same number of
# nodes and that each node services the same number of logical cores and that
# both logical cores and NUMA nodes are enumerated sequentially.
#
# Name the script files uniquely for each copy:
%define bindcmdname run.$SPECCOPYNUM.sh
# Start the script:
%define bindcmd01 \#!/bin/bash
# The first argument passed into the script is the current thread number. Read this
# argument and skip to the next:
%define bindcmd02 threadnum=\$1; shift
# Set the script variable threads_per_node based upon logical_cores_per_node
# in this config file:
%define bindcmd03 threads_per_node=%{logical_cores_per_node}
# Calculate the node for the thread (not used with --localalloc):
%define bindcmd04 ((nodenum=threadnum/threads_per_node))
# Search for the delimiter "--" that precedes $command
%define bindcmd05 \# Skip to command...
%define bindcmd06 while [[ \$1 != -- ]]; do
%define bindcmd07   shift
%define bindcmd08 done
%define bindcmd09 shift
%define bindcmd10 \# What is left in \$* are the commands to execute
# Create the command to launch the executable:
%define run_command exec numactl --localalloc --physcpubind=\$threadnum %{hugectl_cmd} \$*
# echo this command to a file for debug purposes:
%define bindcmd11 echo %{run_command} > run.$SPECCOPYNUM.out 2>&1
# This line executes the fully formed command for the current copy:
%define bindcmd12 %{run_command}

# The submit writes out the script, provides it with arguments calculated above,
# and runs it.
#
# We are building our submit command based upon the bindcmd# lines that we
# created above. Note that the numbers have to match exactly or the script will
# not run properly. Also note that <<EOS...EOS is a block delimited line
# continuation feature of the SPEC CPU config file and are not otherwise
# parsed.
submit=<<EOS
echo '%{bindcmd01}' >  %{bindcmdname}
echo '%{bindcmd02}' >> %{bindcmdname}
echo '%{bindcmd03}' >> %{bindcmdname}
echo '%{bindcmd04}' >> %{bindcmdname}
echo '%{bindcmd05}' >> %{bindcmdname}
echo '%{bindcmd06}' >> %{bindcmdname}
echo '%{bindcmd07}' >> %{bindcmdname}
echo '%{bindcmd08}' >> %{bindcmdname}
echo '%{bindcmd09}' >> %{bindcmdname}
echo '%{bindcmd10}' >> %{bindcmdname}
echo '%{bindcmd11}' >> %{bindcmdname}
echo '%{bindcmd12}' >> %{bindcmdname}
# Now we echo the command to execute the script to a debug file:
%define script_run_command exec /bin/bash ./%{bindcmdname} $SPECCOPYNUM -- $command
echo %{script_run_command} > run.$SPECCOPYNUM.cmd
# And now we execute the script we just created:
%{script_run_command}
EOS

################################################################################
################################################################################

################################
# End auto-configured settings #
################################

################################################################################
# Compilers
################################################################################
default:
CC                       = clang
CXX                      = clang++
FC                       = gfortran
CLD                      = clang
FLD                      = clang
CC_VERSION_OPTION        = --version
CXX_VERSION_OPTION       = --version
FC_VERSION_OPTION        = --version


################################################################################
# Portability Flags
################################################################################
default:# data model applies to all benchmarks
EXTRA_PORTABILITY        = -DSPEC_LP64

# Benchmark-specific portability (anything other than data model
# is allowed only where need is proven

# Benchmark-specific portability (anything other than data model
# is allowed only where need is proven

500.perlbench_r,600.perlbench_s:  #lang='C'
PORTABILITY              = -DSPEC_LINUX_X64

521.wrf_r,621.wrf_s:  #lang='F,C'
CPORTABILITY             = -DSPEC_CASE_FLAG
FPORTABILITY             = -fconvert=big-endian

523.xalancbmk_r,623.xalancbmk_s:  #lang='CXX'
PORTABILITY              = -DSPEC_LINUX

526.blender_r:  #lang='CXX,C'
CPORTABILITY             = -funsigned-char
CXXPORTABILITY           = -D__BOOL_DEFINED

527.cam4_r,627.cam4_s:  #lang='F,C'
PORTABILITY              = -DSPEC_CASE_FLAG

628.pop2_s:  #lang='F,C'
CPORTABILITY             = -DSPEC_CASE_FLAG
FPORTABILITY             = -fconvert=big-endian

################################################################################
# Tuning Flags
################################################################################

#####################
# Base tuning flags #
#####################

default=base:
COPTIMIZE                = -O3 -flto -ffast-math -march=znver1 -fstruct-layout=2 \
                           -mllvm -unroll-threshold=100 -fremap-arrays \
                            -mno-avx2  -mllvm -inline-threshold=1000 \
                            -mllvm  -disable-vect-cmp
CXXOPTIMIZE              = -O3 -flto -march=znver1  \
                           -mllvm -unroll-threshold=100 -finline-aggressive \
                           -fremap-arrays -mllvm -inline-threshold=1000 \
                           -mllvm  -disable-vect-cmp
FOPTIMIZE                = -O3 -mavx -madx -funroll-loops -ffast-math
EXTRA_FFLAGS             = -fplugin=dragonegg.so \
                           -fplugin-arg-dragonegg-llvm-option="-merge-constant -disable-vect-cmp"
EXTRA_FLIBS              = -lgfortran -lamdlibm -lm
EXTRA_LDFLAGS            = -flto -Wl,-plugin-opt=-merge-constant \
                           -Wl,-plugin-opt=-lsr-in-nested-loop \
                           -Wl,-plugin-opt=-disable-vect-cmp
EXTRA_LIBS               = -ljemalloc
# The following is necessary for 502/602 gcc:
LDOPTIMIZE               = -z muldefs

########################
# intrate tuning flags #
########################
intrate:
EXTRA_FFLAGS             = -Ofast -fdefault-integer-8 -fplugin=dragonegg.so \
                           -fplugin-arg-dragonegg-llvm-option="-lsr-in-nested-loop \
                           -enable-iv-split -merge-constant -inline-threshold:1000 -disable-vect-cmp"
preENV_MALLOC_CONF       = lg_chunk:26

#######################
# fprate tuning flags #
#######################
fprate:
preENV_MALLOC_CONF       = lg_chunk:28

#####################
# Peak tuning flags #
#####################

default=peak:
COPTIMIZE                = -Ofast -flto -march=znver1 -fstruct-layout=3 \
                           -mllvm -vectorize-memory-aggressively -mno-avx2 \
                           -mllvm -unroll-threshold=100 -fremap-arrays \
                           -mllvm -inline-threshold=1000

CXXOPTIMIZE              = -Ofast -flto -march=znver1 -finline-aggressive \
                           -mllvm -unroll-threshold=100 -fremap-arrays \
                           -mllvm -inline-threshold=1000

FOPTIMIZE                = -O3 -mavx2 -madx -funroll-loops -ffast-math
EXTRA_FFLAGS             = -fplugin=dragonegg.so \
                           -fplugin-arg-dragonegg-llvm-option="-merge-constant -inline-threshold:1000"
EXTRA_FLIBS              = -lgfortran -lamdlibm -lm
EXTRA_LDFLAGS            = -flto -Wl,-plugin-opt=-merge-constant \
                           -Wl,-plugin-opt=-lsr-in-nested-loop
EXTRA_LIBS               = -ljemalloc
feedback                 = 0
PASS1_CFLAGS             = -fprofile-instr-generate
PASS2_CFLAGS             = -fprofile-instr-use
PASS1_FFLAGS             = -fprofile-generate
PASS2_FFLAGS             = -fprofile-use
PASS1_CXXFLAGS           = -fprofile-instr-generate
PASS2_CXXFLAGS           = -fprofile-instr-use
PASS1_LDFLAGS            = -fprofile-instr-generate
PASS2_LDFLAGS            = -fprofile-instr-use
fdo_run1                 = $command ; llvm-profdata merge -output=default.profdata *.profraw

500.perlbench_r=peak:  #lang='C'
feedback = 1

502.gcc_r=peak:  #lang='C'
EXTRA_PORTABILITY        = -D_FILE_OFFSET_BITS=64
EXTRA_COPTIMIZE          = -fgnu89-inline
CC                       = clang -m32
CLD                      = clang -m32
EXTRA_LIBS               = -L$[JEMALLOC_LIB32_PATH] -ljemalloc
copies                   = %{physical_core_count}

503.bwaves_r=peak:
copies                   = %{physical_core_count}

510.parest_r=peak:
copies                   = %{physical_core_count}

519.lbm_r=peak:
copies                   = %{physical_core_count}

521.wrf_r,621.wrf_s=peak:  #lang='F,C'
COPTIMIZE                = -O3 -mavx -ffast-math
FOPTIMIZE                = -O3 -mavx  -funroll-loops -ffast-math
copies                   = %{physical_core_count}

523.xalancbmk_r=peak:  #lang='CXX`
EXTRA_PORTABILITY        = -D_FILE_OFFSET_BITS=64
CXX                      = clang++ -m32
CXXLD                    = clang++ -m32
EXTRA_LIBS               = -L$[JEMALLOC_LIB32_PATH] -ljemalloc

525.x264_r=peak:  #lang='C'
feedback                 = 1

541.leela_r=peak: #lang="C++"
CXXOPTIMIZE              = -Ofast -flto -march=znver1 -mllvm -unroll-count=8 \
                           -mllvm -unroll-threshold=100
feedback = 1

549.fotonik3d_r=peak:
copies                   = %{physical_core_count}

554.roms_r=peak:
copies                   = %{physical_core_count}


# The following settings were obtained by running the sysinfo_program
# 'specperl $[top]/bin/sysinfo' (sysinfo:SHA:ecd2bef08f316af97f5a7768b641e2a3307c1b4b68efb5a57fa76367d790d233)
default:
notes_plat_sysinfo_000 = Sysinfo program /home/cpu2017/bin/sysinfo
notes_plat_sysinfo_005 = Rev: r5797 of 2017-06-14 96c45e4568ad54c135fd618bcc091c0f
notes_plat_sysinfo_010 = running on dl385g10-2 Mon Dec  4 05:41:50 2017
notes_plat_sysinfo_015 =
notes_plat_sysinfo_020 = SUT (System Under Test) info as seen by some common utilities.
notes_plat_sysinfo_025 = For more information on this section, see
notes_plat_sysinfo_030 =    https://www.spec.org/cpu2017/Docs/config.html#sysinfo
notes_plat_sysinfo_035 =
notes_plat_sysinfo_040 = From /proc/cpuinfo
notes_plat_sysinfo_045 =    model name : AMD EPYC 7401 24-Core Processor
notes_plat_sysinfo_050 =       2  "physical id"s (chips)
notes_plat_sysinfo_055 =       96 "processors"
notes_plat_sysinfo_060 =    cores, siblings (Caution: counting these is hw and system dependent. The following
notes_plat_sysinfo_065 =    excerpts from /proc/cpuinfo might not be reliable.  Use with caution.)
notes_plat_sysinfo_070 =       cpu cores : 24
notes_plat_sysinfo_075 =       siblings  : 48
notes_plat_sysinfo_080 =       physical 0: cores 0 1 2 3 4 5
notes_plat_sysinfo_085 =       physical 1: cores 0 1 2 3 4 5
notes_plat_sysinfo_090 =
notes_plat_sysinfo_095 = From lscpu:
notes_plat_sysinfo_100 =      Architecture:          x86_64
notes_plat_sysinfo_105 =      CPU op-mode(s):        32-bit, 64-bit
notes_plat_sysinfo_110 =      Byte Order:            Little Endian
notes_plat_sysinfo_115 =      CPU(s):                96
notes_plat_sysinfo_120 =      On-line CPU(s) list:   0-95
notes_plat_sysinfo_125 =      Thread(s) per core:    2
notes_plat_sysinfo_130 =      Core(s) per socket:    24
notes_plat_sysinfo_135 =      Socket(s):             2
notes_plat_sysinfo_140 =      NUMA node(s):          8
notes_plat_sysinfo_145 =      Vendor ID:             AuthenticAMD
notes_plat_sysinfo_150 =      CPU family:            23
notes_plat_sysinfo_155 =      Model:                 1
notes_plat_sysinfo_160 =      Model name:            AMD EPYC 7401 24-Core Processor
notes_plat_sysinfo_165 =      Stepping:              2
notes_plat_sysinfo_170 =      CPU MHz:               2000.000
notes_plat_sysinfo_175 =      CPU max MHz:           2000.0000
notes_plat_sysinfo_180 =      CPU min MHz:           1200.0000
notes_plat_sysinfo_185 =      BogoMIPS:              3992.30
notes_plat_sysinfo_190 =      Virtualization:        AMD-V
notes_plat_sysinfo_195 =      L1d cache:             32K
notes_plat_sysinfo_200 =      L1i cache:             64K
notes_plat_sysinfo_205 =      L2 cache:              512K
notes_plat_sysinfo_210 =      L3 cache:              8192K
notes_plat_sysinfo_215 =      NUMA node0 CPU(s):     0-5,48-53
notes_plat_sysinfo_220 =      NUMA node1 CPU(s):     6-11,54-59
notes_plat_sysinfo_225 =      NUMA node2 CPU(s):     12-17,60-65
notes_plat_sysinfo_230 =      NUMA node3 CPU(s):     18-23,66-71
notes_plat_sysinfo_235 =      NUMA node4 CPU(s):     24-29,72-77
notes_plat_sysinfo_240 =      NUMA node5 CPU(s):     30-35,78-83
notes_plat_sysinfo_245 =      NUMA node6 CPU(s):     36-41,84-89
notes_plat_sysinfo_250 =      NUMA node7 CPU(s):     42-47,90-95
notes_plat_sysinfo_255 =      Flags:                 fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov
notes_plat_sysinfo_260 =      pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm
notes_plat_sysinfo_265 =      constant_tsc rep_good nopl nonstop_tsc extd_apicid amd_dcm aperfmperf eagerfpu pni
notes_plat_sysinfo_270 =      pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c
notes_plat_sysinfo_275 =      rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch
notes_plat_sysinfo_280 =      osvw skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_l2 mwaitx arat cpb
notes_plat_sysinfo_285 =      hw_pstate npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists
notes_plat_sysinfo_290 =      pausefilter pfthreshold vmmcall avic fsgsbase bmi1 avx2 smep bmi2 rdseed adx smap
notes_plat_sysinfo_295 =      clflushopt sha_ni xsaveopt xsavec xgetbv1 clzero irperf overflow_recov succor smca
notes_plat_sysinfo_300 =
notes_plat_sysinfo_305 = /proc/cpuinfo cache data
notes_plat_sysinfo_310 =    cache size : 512 KB
notes_plat_sysinfo_315 =
notes_plat_sysinfo_320 = From numactl --hardware  WARNING: a numactl 'node' might or might not correspond to a
notes_plat_sysinfo_325 = physical chip.
notes_plat_sysinfo_330 =   available: 8 nodes (0-7)
notes_plat_sysinfo_335 =   node 0 cpus: 0 1 2 3 4 5 48 49 50 51 52 53
notes_plat_sysinfo_340 =   node 0 size: 128775 MB
notes_plat_sysinfo_345 =   node 0 free: 128542 MB
notes_plat_sysinfo_350 =   node 1 cpus: 6 7 8 9 10 11 54 55 56 57 58 59
notes_plat_sysinfo_355 =   node 1 size: 129021 MB
notes_plat_sysinfo_360 =   node 1 free: 128795 MB
notes_plat_sysinfo_365 =   node 2 cpus: 12 13 14 15 16 17 60 61 62 63 64 65
notes_plat_sysinfo_370 =   node 2 size: 129021 MB
notes_plat_sysinfo_375 =   node 2 free: 128818 MB
notes_plat_sysinfo_380 =   node 3 cpus: 18 19 20 21 22 23 66 67 68 69 70 71
notes_plat_sysinfo_385 =   node 3 size: 129021 MB
notes_plat_sysinfo_390 =   node 3 free: 128797 MB
notes_plat_sysinfo_395 =   node 4 cpus: 24 25 26 27 28 29 72 73 74 75 76 77
notes_plat_sysinfo_400 =   node 4 size: 129021 MB
notes_plat_sysinfo_405 =   node 4 free: 128842 MB
notes_plat_sysinfo_410 =   node 5 cpus: 30 31 32 33 34 35 78 79 80 81 82 83
notes_plat_sysinfo_415 =   node 5 size: 129021 MB
notes_plat_sysinfo_420 =   node 5 free: 128837 MB
notes_plat_sysinfo_425 =   node 6 cpus: 36 37 38 39 40 41 84 85 86 87 88 89
notes_plat_sysinfo_430 =   node 6 size: 129021 MB
notes_plat_sysinfo_435 =   node 6 free: 128841 MB
notes_plat_sysinfo_440 =   node 7 cpus: 42 43 44 45 46 47 90 91 92 93 94 95
notes_plat_sysinfo_445 =   node 7 size: 128867 MB
notes_plat_sysinfo_450 =   node 7 free: 128683 MB
notes_plat_sysinfo_455 =   node distances:
notes_plat_sysinfo_460 =   node   0   1   2   3   4   5   6   7
notes_plat_sysinfo_465 =     0:  10  16  16  16  32  32  32  32
notes_plat_sysinfo_470 =     1:  16  10  16  16  32  32  32  32
notes_plat_sysinfo_475 =     2:  16  16  10  16  32  32  32  32
notes_plat_sysinfo_480 =     3:  16  16  16  10  32  32  32  32
notes_plat_sysinfo_485 =     4:  32  32  32  32  10  16  16  16
notes_plat_sysinfo_490 =     5:  32  32  32  32  16  10  16  16
notes_plat_sysinfo_495 =     6:  32  32  32  32  16  16  10  16
notes_plat_sysinfo_500 =     7:  32  32  32  32  16  16  16  10
notes_plat_sysinfo_505 =
notes_plat_sysinfo_510 = From /proc/meminfo
notes_plat_sysinfo_515 =    MemTotal:       1056536240 kB
notes_plat_sysinfo_520 =    HugePages_Total:       0
notes_plat_sysinfo_525 =    Hugepagesize:       2048 kB
notes_plat_sysinfo_530 =
notes_plat_sysinfo_535 = /usr/bin/lsb_release -d
notes_plat_sysinfo_540 =    SUSE Linux Enterprise Server 12 SP3
notes_plat_sysinfo_545 =
notes_plat_sysinfo_550 = From /etc/*release* /etc/*version*
notes_plat_sysinfo_555 =    SuSE-release:
notes_plat_sysinfo_560 =       SUSE Linux Enterprise Server 12 (x86_64)
notes_plat_sysinfo_565 =       VERSION = 12
notes_plat_sysinfo_570 =       PATCHLEVEL = 3
notes_plat_sysinfo_575 =       # This file is deprecated and will be removed in a future service pack or release.
notes_plat_sysinfo_580 =       # Please check /etc/os-release for details about this release.
notes_plat_sysinfo_585 =    os-release:
notes_plat_sysinfo_590 =       NAME="SLES"
notes_plat_sysinfo_595 =       VERSION="12-SP3"
notes_plat_sysinfo_600 =       VERSION_ID="12.3"
notes_plat_sysinfo_605 =       PRETTY_NAME="SUSE Linux Enterprise Server 12 SP3"
notes_plat_sysinfo_610 =       ID="sles"
notes_plat_sysinfo_615 =       ANSI_COLOR="0;32"
notes_plat_sysinfo_620 =       CPE_NAME="cpe:/o:suse:sles:12:sp3"
notes_plat_sysinfo_625 =
notes_plat_sysinfo_630 = uname -a:
notes_plat_sysinfo_635 =    Linux dl385g10-2 4.4.73-5-default #1 SMP Tue Jul 4 15:33:39 UTC 2017 (b7ce4e4) x86_64
notes_plat_sysinfo_640 =    x86_64 x86_64 GNU/Linux
notes_plat_sysinfo_645 =
notes_plat_sysinfo_650 = run-level 3 Jan 2 07:07
notes_plat_sysinfo_655 =
notes_plat_sysinfo_660 = SPEC is set to: /home/cpu2017
notes_plat_sysinfo_665 =    Filesystem     Type  Size  Used Avail Use% Mounted on
notes_plat_sysinfo_670 =    /dev/sda4      xfs   331G   57G  274G  18% /home
notes_plat_sysinfo_675 =
notes_plat_sysinfo_680 = Additional information from dmidecode follows.  WARNING: Use caution when you interpret
notes_plat_sysinfo_685 = this section. The 'dmidecode' program reads system data which is "intended to allow
notes_plat_sysinfo_690 = hardware to be accurately determined", but the intent may not be met, as there are
notes_plat_sysinfo_695 = frequent changes to hardware, firmware, and the "DMTF SMBIOS" standard.
notes_plat_sysinfo_700 =   BIOS HPE A40 11/17/2017
notes_plat_sysinfo_705 =   Memory:
notes_plat_sysinfo_710 =    16x UNKNOWN NOT AVAILABLE
notes_plat_sysinfo_715 =    16x UNKNOWN NOT AVAILABLE 64 GB 4 rank 2666
notes_plat_sysinfo_720 =
notes_plat_sysinfo_725 = (End of data from sysinfo program)
hw_cpu_name    = AMD EPYC 7401
hw_disk        = 331 GB  add more disk info here
hw_memory001   = 1007.591 GB fixme: If using DDR3, format is:
hw_memory002   = 'N GB (M x N GB nRxn PCn-nnnnnR-n, ECC)'
hw_nchips      = 2
prepared_by    = root  (is never output, only tags rawfile)
sw_file        = xfs
sw_os001       = SUSE Linux Enterprise Server 12 SP3
sw_os002       = 4.4.73-5-default
sw_state       = Run level 3 (add definition here)
# End of settings added by sysinfo_program


# The following section was added automatically, and contains settings that
# did not appear in the original configuration file, but were added to the
# raw file after the run.
default:
notes_000 =Environment variables set by runcpu before the start of the run:
notes_005 =LD_LIBRARY_PATH = "/home/cpu2017/amd1704-rate-libs-revC/64;/home/cpu2017/amd1704-rate-libs-revC/32:"
notes_010 =MALLOC_CONF = "lg_chunk:28"
notes_015 =
notes_plat_000 = BIOS Configuration:
notes_plat_005 =  Thermal Configuration set to Maximum Cooling
notes_plat_010 =  Performance Determinism set to Power Deterministic
notes_plat_015 =  Memory Patrol Scrubbing set to Disabled
notes_plat_020 =  Workload Profile set to General Throughput Compute
notes_plat_025 =  Minimum Processor Idle Power Core C-State set to C6 State
notes_plat_030 =  Processor Power and Utilization Monitoring set to Disabled