My personal notes on PAPI and hardware
http://icl.cs.utk.edu/papi/
! hostnamectl
Static hostname: sdumont18
Icon name: computer-server
Chassis: server
Machine ID: b2e8ad25684d44b68de42a969adb33bb
Boot ID: db5503ea9fda467baaa65b75646e5891
Operating System: Red Hat Enterprise Linux Server 7.7 (Maipo)
CPE OS Name: cpe:/o:redhat:enterprise_linux:7.7:GA:server
Kernel: Linux 3.10.0-957.27.2.el7.x86_64
Architecture: x86-64
! lspci | grep NVIDIA
3b:00.0 3D controller: NVIDIA Corporation GV100GL [Tesla V100 PCIe 32GB] (rev a1)
5e:00.0 3D controller: NVIDIA Corporation GV100GL [Tesla V100 PCIe 32GB] (rev a1)
86:00.0 3D controller: NVIDIA Corporation GV100GL [Tesla V100 PCIe 32GB] (rev a1)
af:00.0 3D controller: NVIDIA Corporation GV100GL [Tesla V100 PCIe 32GB] (rev a1)
! lscpu | egrep 'Model name|Socket|Thread|NUMA|CPU\(s\)'
CPU(s): 88
On-line CPU(s) list: 0-87
Thread(s) per core: 2
Socket(s): 2
NUMA node(s): 2
Model name: Intel(R) Xeon(R) Gold 6152 CPU @ 2.10GHz
NUMA node0 CPU(s): 0-21,44-65
NUMA node1 CPU(s): 22-43,66-87
! echo "CPU threads: $(grep -c processor /proc/cpuinfo)"
CPU threads: 88
! nproc --all
88
! cat /proc/meminfo
MemTotal: 791009752 kB
MemFree: 705361836 kB
MemAvailable: 773516612 kB
Buffers: 399644 kB
Cached: 68407188 kB
SwapCached: 0 kB
Active: 19929236 kB
Inactive: 49909108 kB
Active(anon): 1646308 kB
Inactive(anon): 243172 kB
Active(file): 18282928 kB
Inactive(file): 49665936 kB
Unevictable: 0 kB
Mlocked: 0 kB
SwapTotal: 0 kB
SwapFree: 0 kB
Dirty: 24 kB
Writeback: 0 kB
AnonPages: 1031572 kB
Mapped: 221036 kB
Shmem: 857964 kB
Slab: 8127912 kB
SReclaimable: 2029056 kB
SUnreclaim: 6098856 kB
KernelStack: 29440 kB
PageTables: 37780 kB
NFS_Unstable: 0 kB
Bounce: 0 kB
WritebackTmp: 0 kB
CommitLimit: 395504876 kB
Committed_AS: 5073604 kB
VmallocTotal: 34359738367 kB
VmallocUsed: 2426432 kB
VmallocChunk: 33954576380 kB
HardwareCorrupted: 0 kB
AnonHugePages: 339968 kB
CmaTotal: 0 kB
CmaFree: 0 kB
HugePages_Total: 0
HugePages_Free: 0
HugePages_Rsvd: 0
HugePages_Surp: 0
Hugepagesize: 2048 kB
DirectMap4k: 38993104 kB
DirectMap2M: 619157504 kB
DirectMap1G: 147849216 kB
! lsblk
lsblk: /scratch/app/anaconda3/2020.11/lib/libuuid.so.1: no version information available (required by /usr/lib64/libblkid.so.1)
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sda 8:0 0 894.3G 0 disk
├─sda1 8:1 0 2G 0 part
│ └─md0 9:0 0 2G 0 raid1 /boot/efi
├─sda2 8:2 0 2G 0 part
│ └─md1 9:1 0 2G 0 raid1 /boot
└─sda3 8:3 0 890.3G 0 part
└─md2 9:2 0 890.1G 0 raid1 /
sdb 8:16 0 894.3G 0 disk
├─sdb1 8:17 0 2G 0 part
│ └─md0 9:0 0 2G 0 raid1 /boot/efi
├─sdb2 8:18 0 2G 0 part
│ └─md1 9:1 0 2G 0 raid1 /boot
└─sdb3 8:19 0 890.3G 0 part
└─md2 9:2 0 890.1G 0 raid1 /
! module avail papi
------------------------ /usr/share/Modules/modulefiles ------------------------
papi/5.5.1.0 papi-devel/5.5.1.0
%%bash
module load papi papi-devel
module display papi papi-devel
-------------------------------------------------------------------
/usr/share/Modules/modulefiles/papi/5.5.1.0:
module-whatis loads the papi-5.5.1.0 environment
prepend-path PATH /opt/bullxde/perftools/papi/5.5.1.0/bin
append-path MANPATH /opt/bullxde/perftools/papi/5.5.1.0/man:
-------------------------------------------------------------------
-------------------------------------------------------------------
/usr/share/Modules/modulefiles/papi-devel/5.5.1.0:
module-whatis loads the papi-devel-5.5.1.0 environment
append-path MANPATH /opt/bullxde/perftools/papi/5.5.1.0/man:
prepend-path CPATH /opt/bullxde/perftools/papi/5.5.1.0/include
prepend-path LIBRARY_PATH /opt/bullxde/perftools/papi/5.5.1.0/lib64
prepend-path LD_LIBRARY_PATH /opt/bullxde/perftools/papi/5.5.1.0/lib64
prepend-path CPATH /opt/bullxde/perftools/papi/5.5.1.0/include/perfmon
-------------------------------------------------------------------
%%bash
module load papi papi-devel
papi_avail
Available PAPI preset and user defined events plus hardware information.
--------------------------------------------------------------------------------
PAPI Version : 5.5.1.0
Vendor string and code : GenuineIntel (1)
Model string and code : Intel(R) Xeon(R) Gold 6152 CPU @ 2.10GHz (85)
CPU Revision : 4.000000
CPUID Info : Family: 6 Model: 85 Stepping: 4
CPU Max Megahertz : 2101
CPU Min Megahertz : 1000
Hdw Threads per core : 2
Cores per Socket : 22
Sockets : 2
NUMA Nodes : 2
CPUs per Node : 44
Total CPUs : 88
Running in a VM : no
Number Hardware Counters : 11
Max Multiplex Counters : 384
--------------------------------------------------------------------------------
================================================================================
PAPI Preset Events
================================================================================
Name Code Avail Deriv Description (Note)
PAPI_L1_DCM 0x80000000 Yes No Level 1 data cache misses
PAPI_L1_ICM 0x80000001 Yes No Level 1 instruction cache misses
PAPI_L2_DCM 0x80000002 Yes Yes Level 2 data cache misses
PAPI_L2_ICM 0x80000003 Yes No Level 2 instruction cache misses
PAPI_L3_DCM 0x80000004 No No Level 3 data cache misses
PAPI_L3_ICM 0x80000005 No No Level 3 instruction cache misses
PAPI_L1_TCM 0x80000006 Yes Yes Level 1 cache misses
PAPI_L2_TCM 0x80000007 Yes No Level 2 cache misses
PAPI_L3_TCM 0x80000008 Yes No Level 3 cache misses
PAPI_CA_SNP 0x80000009 Yes No Requests for a snoop
PAPI_CA_SHR 0x8000000a Yes No Requests for exclusive access to shared cache line
PAPI_CA_CLN 0x8000000b Yes No Requests for exclusive access to clean cache line
PAPI_CA_INV 0x8000000c No No Requests for cache line invalidation
PAPI_CA_ITV 0x8000000d Yes No Requests for cache line intervention
PAPI_L3_LDM 0x8000000e Yes No Level 3 load misses
PAPI_L3_STM 0x8000000f No No Level 3 store misses
PAPI_BRU_IDL 0x80000010 No No Cycles branch units are idle
PAPI_FXU_IDL 0x80000011 No No Cycles integer units are idle
PAPI_FPU_IDL 0x80000012 No No Cycles floating point units are idle
PAPI_LSU_IDL 0x80000013 No No Cycles load/store units are idle
PAPI_TLB_DM 0x80000014 Yes Yes Data translation lookaside buffer misses
PAPI_TLB_IM 0x80000015 Yes No Instruction translation lookaside buffer misses
PAPI_TLB_TL 0x80000016 No No Total translation lookaside buffer misses
PAPI_L1_LDM 0x80000017 Yes No Level 1 load misses
PAPI_L1_STM 0x80000018 Yes No Level 1 store misses
PAPI_L2_LDM 0x80000019 Yes No Level 2 load misses
PAPI_L2_STM 0x8000001a Yes No Level 2 store misses
PAPI_BTAC_M 0x8000001b No No Branch target address cache misses
PAPI_PRF_DM 0x8000001c Yes No Data prefetch cache misses
PAPI_L3_DCH 0x8000001d No No Level 3 data cache hits
PAPI_TLB_SD 0x8000001e No No Translation lookaside buffer shootdowns
PAPI_CSR_FAL 0x8000001f No No Failed store conditional instructions
PAPI_CSR_SUC 0x80000020 No No Successful store conditional instructions
PAPI_CSR_TOT 0x80000021 No No Total store conditional instructions
PAPI_MEM_SCY 0x80000022 No No Cycles Stalled Waiting for memory accesses
PAPI_MEM_RCY 0x80000023 No No Cycles Stalled Waiting for memory Reads
PAPI_MEM_WCY 0x80000024 Yes No Cycles Stalled Waiting for memory writes
PAPI_STL_ICY 0x80000025 Yes No Cycles with no instruction issue
PAPI_FUL_ICY 0x80000026 Yes Yes Cycles with maximum instruction issue
PAPI_STL_CCY 0x80000027 Yes No Cycles with no instructions completed
PAPI_FUL_CCY 0x80000028 Yes No Cycles with maximum instructions completed
PAPI_HW_INT 0x80000029 No No Hardware interrupts
PAPI_BR_UCN 0x8000002a Yes Yes Unconditional branch instructions
PAPI_BR_CN 0x8000002b Yes No Conditional branch instructions
PAPI_BR_TKN 0x8000002c Yes Yes Conditional branch instructions taken
PAPI_BR_NTK 0x8000002d Yes No Conditional branch instructions not taken
PAPI_BR_MSP 0x8000002e Yes No Conditional branch instructions mispredicted
PAPI_BR_PRC 0x8000002f Yes Yes Conditional branch instructions correctly predicted
PAPI_FMA_INS 0x80000030 No No FMA instructions completed
PAPI_TOT_IIS 0x80000031 No No Instructions issued
PAPI_TOT_INS 0x80000032 Yes No Instructions completed
PAPI_INT_INS 0x80000033 No No Integer instructions
PAPI_FP_INS 0x80000034 No No Floating point instructions
PAPI_LD_INS 0x80000035 Yes No Load instructions
PAPI_SR_INS 0x80000036 Yes No Store instructions
PAPI_BR_INS 0x80000037 Yes No Branch instructions
PAPI_VEC_INS 0x80000038 No No Vector/SIMD instructions (could include integer)
PAPI_RES_STL 0x80000039 Yes No Cycles stalled on any resource
PAPI_FP_STAL 0x8000003a No No Cycles the FP unit(s) are stalled
PAPI_TOT_CYC 0x8000003b Yes No Total cycles
PAPI_LST_INS 0x8000003c Yes Yes Load/store instructions completed
PAPI_SYC_INS 0x8000003d No No Synchronization instructions completed
PAPI_L1_DCH 0x8000003e No No Level 1 data cache hits
PAPI_L2_DCH 0x8000003f No No Level 2 data cache hits
PAPI_L1_DCA 0x80000040 No No Level 1 data cache accesses
PAPI_L2_DCA 0x80000041 Yes No Level 2 data cache accesses
PAPI_L3_DCA 0x80000042 Yes Yes Level 3 data cache accesses
PAPI_L1_DCR 0x80000043 No No Level 1 data cache reads
PAPI_L2_DCR 0x80000044 Yes No Level 2 data cache reads
PAPI_L3_DCR 0x80000045 Yes No Level 3 data cache reads
PAPI_L1_DCW 0x80000046 No No Level 1 data cache writes
PAPI_L2_DCW 0x80000047 Yes Yes Level 2 data cache writes
PAPI_L3_DCW 0x80000048 Yes No Level 3 data cache writes
PAPI_L1_ICH 0x80000049 No No Level 1 instruction cache hits
PAPI_L2_ICH 0x8000004a Yes No Level 2 instruction cache hits
PAPI_L3_ICH 0x8000004b No No Level 3 instruction cache hits
PAPI_L1_ICA 0x8000004c No No Level 1 instruction cache accesses
PAPI_L2_ICA 0x8000004d Yes No Level 2 instruction cache accesses
PAPI_L3_ICA 0x8000004e Yes No Level 3 instruction cache accesses
PAPI_L1_ICR 0x8000004f No No Level 1 instruction cache reads
PAPI_L2_ICR 0x80000050 Yes No Level 2 instruction cache reads
PAPI_L3_ICR 0x80000051 Yes No Level 3 instruction cache reads
PAPI_L1_ICW 0x80000052 No No Level 1 instruction cache writes
PAPI_L2_ICW 0x80000053 No No Level 2 instruction cache writes
PAPI_L3_ICW 0x80000054 No No Level 3 instruction cache writes
PAPI_L1_TCH 0x80000055 No No Level 1 total cache hits
PAPI_L2_TCH 0x80000056 No No Level 2 total cache hits
PAPI_L3_TCH 0x80000057 No No Level 3 total cache hits
PAPI_L1_TCA 0x80000058 No No Level 1 total cache accesses
PAPI_L2_TCA 0x80000059 Yes Yes Level 2 total cache accesses
PAPI_L3_TCA 0x8000005a Yes No Level 3 total cache accesses
PAPI_L1_TCR 0x8000005b No No Level 1 total cache reads
PAPI_L2_TCR 0x8000005c Yes Yes Level 2 total cache reads
PAPI_L3_TCR 0x8000005d Yes Yes Level 3 total cache reads
PAPI_L1_TCW 0x8000005e No No Level 1 total cache writes
PAPI_L2_TCW 0x8000005f Yes Yes Level 2 total cache writes
PAPI_L3_TCW 0x80000060 Yes No Level 3 total cache writes
PAPI_FML_INS 0x80000061 No No Floating point multiply instructions
PAPI_FAD_INS 0x80000062 No No Floating point add instructions
PAPI_FDV_INS 0x80000063 No No Floating point divide instructions
PAPI_FSQ_INS 0x80000064 No No Floating point square root instructions
PAPI_FNV_INS 0x80000065 No No Floating point inverse instructions
PAPI_FP_OPS 0x80000066 No No Floating point operations
PAPI_SP_OPS 0x80000067 Yes Yes Floating point operations; optimized to count scaled single precision vector operations
PAPI_DP_OPS 0x80000068 Yes Yes Floating point operations; optimized to count scaled double precision vector operations
PAPI_VEC_SP 0x80000069 Yes Yes Single precision vector/SIMD instructions
PAPI_VEC_DP 0x8000006a Yes Yes Double precision vector/SIMD instructions
PAPI_REF_CYC 0x8000006b Yes No Reference clock cycles
--------------------------------------------------------------------------------
Of 108 possible events, 59 are available, of which 18 are derived.
avail.c PASSED
Show only Avail
%%bash
module load papi papi-devel
papi_avail | egrep 'Deriv|Yes'
Name Code Avail Deriv Description (Note)
PAPI_L1_DCM 0x80000000 Yes No Level 1 data cache misses
PAPI_L1_ICM 0x80000001 Yes No Level 1 instruction cache misses
PAPI_L2_DCM 0x80000002 Yes Yes Level 2 data cache misses
PAPI_L2_ICM 0x80000003 Yes No Level 2 instruction cache misses
PAPI_L1_TCM 0x80000006 Yes Yes Level 1 cache misses
PAPI_L2_TCM 0x80000007 Yes No Level 2 cache misses
PAPI_L3_TCM 0x80000008 Yes No Level 3 cache misses
PAPI_CA_SNP 0x80000009 Yes No Requests for a snoop
PAPI_CA_SHR 0x8000000a Yes No Requests for exclusive access to shared cache line
PAPI_CA_CLN 0x8000000b Yes No Requests for exclusive access to clean cache line
PAPI_CA_ITV 0x8000000d Yes No Requests for cache line intervention
PAPI_L3_LDM 0x8000000e Yes No Level 3 load misses
PAPI_TLB_DM 0x80000014 Yes Yes Data translation lookaside buffer misses
PAPI_TLB_IM 0x80000015 Yes No Instruction translation lookaside buffer misses
PAPI_L1_LDM 0x80000017 Yes No Level 1 load misses
PAPI_L1_STM 0x80000018 Yes No Level 1 store misses
PAPI_L2_LDM 0x80000019 Yes No Level 2 load misses
PAPI_L2_STM 0x8000001a Yes No Level 2 store misses
PAPI_PRF_DM 0x8000001c Yes No Data prefetch cache misses
PAPI_MEM_WCY 0x80000024 Yes No Cycles Stalled Waiting for memory writes
PAPI_STL_ICY 0x80000025 Yes No Cycles with no instruction issue
PAPI_FUL_ICY 0x80000026 Yes Yes Cycles with maximum instruction issue
PAPI_STL_CCY 0x80000027 Yes No Cycles with no instructions completed
PAPI_FUL_CCY 0x80000028 Yes No Cycles with maximum instructions completed
PAPI_BR_UCN 0x8000002a Yes Yes Unconditional branch instructions
PAPI_BR_CN 0x8000002b Yes No Conditional branch instructions
PAPI_BR_TKN 0x8000002c Yes Yes Conditional branch instructions taken
PAPI_BR_NTK 0x8000002d Yes No Conditional branch instructions not taken
PAPI_BR_MSP 0x8000002e Yes No Conditional branch instructions mispredicted
PAPI_BR_PRC 0x8000002f Yes Yes Conditional branch instructions correctly predicted
PAPI_TOT_INS 0x80000032 Yes No Instructions completed
PAPI_LD_INS 0x80000035 Yes No Load instructions
PAPI_SR_INS 0x80000036 Yes No Store instructions
PAPI_BR_INS 0x80000037 Yes No Branch instructions
PAPI_RES_STL 0x80000039 Yes No Cycles stalled on any resource
PAPI_TOT_CYC 0x8000003b Yes No Total cycles
PAPI_LST_INS 0x8000003c Yes Yes Load/store instructions completed
PAPI_L2_DCA 0x80000041 Yes No Level 2 data cache accesses
PAPI_L3_DCA 0x80000042 Yes Yes Level 3 data cache accesses
PAPI_L2_DCR 0x80000044 Yes No Level 2 data cache reads
PAPI_L3_DCR 0x80000045 Yes No Level 3 data cache reads
PAPI_L2_DCW 0x80000047 Yes Yes Level 2 data cache writes
PAPI_L3_DCW 0x80000048 Yes No Level 3 data cache writes
PAPI_L2_ICH 0x8000004a Yes No Level 2 instruction cache hits
PAPI_L2_ICA 0x8000004d Yes No Level 2 instruction cache accesses
PAPI_L3_ICA 0x8000004e Yes No Level 3 instruction cache accesses
PAPI_L2_ICR 0x80000050 Yes No Level 2 instruction cache reads
PAPI_L3_ICR 0x80000051 Yes No Level 3 instruction cache reads
PAPI_L2_TCA 0x80000059 Yes Yes Level 2 total cache accesses
PAPI_L3_TCA 0x8000005a Yes No Level 3 total cache accesses
PAPI_L2_TCR 0x8000005c Yes Yes Level 2 total cache reads
PAPI_L3_TCR 0x8000005d Yes Yes Level 3 total cache reads
PAPI_L2_TCW 0x8000005f Yes Yes Level 2 total cache writes
PAPI_L3_TCW 0x80000060 Yes No Level 3 total cache writes
PAPI_SP_OPS 0x80000067 Yes Yes Floating point operations; optimized to count scaled single precision vector operations
PAPI_DP_OPS 0x80000068 Yes Yes Floating point operations; optimized to count scaled double precision vector operations
PAPI_VEC_SP 0x80000069 Yes Yes Single precision vector/SIMD instructions
PAPI_VEC_DP 0x8000006a Yes Yes Double precision vector/SIMD instructions
PAPI_REF_CYC 0x8000006b Yes No Reference clock cycles
%%bash
module load papi papi-devel
papi_component_avail
Available components and hardware information.
--------------------------------------------------------------------------------
PAPI Version : 5.5.1.0
Vendor string and code : GenuineIntel (1)
Model string and code : Intel(R) Xeon(R) Gold 6152 CPU @ 2.10GHz (85)
CPU Revision : 4.000000
CPUID Info : Family: 6 Model: 85 Stepping: 4
CPU Max Megahertz : 2101
CPU Min Megahertz : 1000
Hdw Threads per core : 2
Cores per Socket : 22
Sockets : 2
NUMA Nodes : 2
CPUs per Node : 44
Total CPUs : 88
Running in a VM : no
Number Hardware Counters : 11
Max Multiplex Counters : 384
--------------------------------------------------------------------------------
Compiled-in components:
Name: perf_event Linux perf_event CPU counters
Name: perf_event_uncore Linux perf_event CPU uncore and northbridge
\-> Disabled: No uncore PMUs or events found
Name: rapl Linux RAPL energy measurements
\-> Disabled: CPU model not supported
Name: net Linux network driver statistics
Name: lustre Lustre filesystem statistics
\-> Disabled: No lustre filesystems found
Name: infiniband Linux Infiniband statistics using the sysfs interface
Active components:
Name: perf_event Linux perf_event CPU counters
Native: 162, Preset: 59, Counters: 11
PMU's supported: ix86arch, perf, perf_raw, skl
Name: net Linux network driver statistics
Native: 128, Preset: 0, Counters: 320
PMU's supported:
Name: infiniband Linux Infiniband statistics using the sysfs interface
Native: 21, Preset: 0, Counters: 21
PMU's supported:
--------------------------------------------------------------------------------
component.c PASSED
%%bash
gfortran --version
GNU Fortran (GCC) 4.8.5 20150623 (Red Hat 4.8.5-39)
Copyright (C) 2015 Free Software Foundation, Inc.
GNU Fortran comes with NO WARRANTY, to the extent permitted by law.
You may redistribute copies of GNU Fortran
under the terms of the GNU General Public License.
For more information about these matters, see the file named COPYING
%%bash
module load papi papi-devel
echo $CPATH
echo $LIBRARY_PATH
/opt/bullxde/perftools/papi/5.5.1.0/include/perfmon:/opt/bullxde/perftools/papi/5.5.1.0/include
/opt/bullxde/perftools/papi/5.5.1.0/lib64
! ls /opt/bullxde/perftools/papi/5.5.1.0/include
f77papi.h f90papi.h fpapi.h papi.h papiStdEventDefs.h
! ls /opt/bullxde/perftools/papi/5.5.1.0/lib64
libpapi.a libpapi.so.5 libpapi.so.5.5.1.0 libpfm.so libpfm.so.4.8.0
libpapi.so libpapi.so.5.5.1 libpfm.a libpfm.so.4 pkgconfig
Test
Testing one small code only to see if works
%%writefile test01.f90
!-----------------------------------------------------------------------
program test01
implicit none
include 'f90papi.h'
integer, parameter :: N = 512
double precision, dimension(N, N) :: a, b
double precision :: t1, t2, rate
integer :: i, j
integer, parameter :: max_event = 3
integer, dimension(max_event) :: event
integer(kind=8), dimension(max_event) :: values
integer :: retval
event(1) = PAPI_LD_INS
event(2) = PAPI_SR_INS
!event(x) = PAPI_L1_TCM
!event(x) = PAPI_L2_TCM
event(3) = PAPI_L3_TCM
call init01(a, b, N) ! init matrix cels
call PAPIF_start_counters (event, max_event, retval)
if (retval /= PAPI_OK) then
call PAPIF_perror('PAPIF_start_counters')
stop
endif
call cpu_time(t1) ! CPU elapsed time in seconds
do j = 1, N ! transpose the matrix
do i = 1, N
b(i, j) = a(j, i)
enddo
enddo
call cpu_time(t2) ! CPU elapsed time in seconds
! Read out PAPI counters
call PAPIF_read_counters(values, max_event, retval)
if (retval /= PAPI_OK) then
call PAPIF_perror('PAPIF_read_counters')
stop
endif
call check01(a, b, N) ! check the transpose
! Print Timings
print*, 'PAPI_LD_INS', values(1)
print*, 'PAPI_SR_INS', values(2)
!print*, 'PAPI_L1_TCM', values(x)
!print*, 'PAPI_L2_TCM', values(x)
print*, 'PAPI_L3_TCM', values(3)
rate = 2 * N * N / (1024 * 1024 * (t2 - t1))
print '(a, i0, a, f10.6, a, f6.1, a)', &
"N=", N, ", T=", t2 - t1, " s, Rate=", rate, " MB/s"
contains
subroutine init01(a, b, N)
implicit none
integer, intent(in) :: N
double precision, intent(inout) :: a(N, N), b(N, N)
integer :: i, j
do i = 1, N
do j = 1, N
a(i, j) = 1.0
b(i, j) = 0.0
enddo
enddo
end subroutine
subroutine check01(a, b, N)
implicit none
integer, intent(in) :: N
double precision, intent(in) :: a(N, N), b(N, N)
integer :: i, j
do i = 1, N
do j = 1, N
if ( a(i, j) /= b(i, j) ) then
print *, "Error: ", i, j
endif
enddo
enddo
end subroutine
end program
Overwriting test01.f90
%%bash
module load papi papi-devel
gfortran -lpapi -o test01 test01.f90 \
-I /opt/bullxde/perftools/papi/5.5.1.0/include
%%bash
module load papi papi-devel
./test01
PAPI_LD_INS 1838017
PAPI_SR_INS 525961
PAPI_L3_TCM 1379
N=512, T= 0.001251 s, Rate= 399.7 MB/s