diff --git a/conf.py b/conf.py
index 8b09ed50..9ef7c04e 100644
--- a/conf.py
+++ b/conf.py
@@ -116,4 +116,8 @@ def setup(app):
rst_prolog = r"""
.. |R| replace:: \ :sup:`®`
+
+.. |br| raw:: html
+
+
"""
diff --git a/images/scorep_error.png b/images/scorep_error.png
new file mode 100644
index 00000000..49aadbac
Binary files /dev/null and b/images/scorep_error.png differ
diff --git a/images/scorep_filter_functions.png b/images/scorep_filter_functions.png
new file mode 100644
index 00000000..6b86ae5e
Binary files /dev/null and b/images/scorep_filter_functions.png differ
diff --git a/images/scorep_filtering.png b/images/scorep_filtering.png
new file mode 100644
index 00000000..09948e00
Binary files /dev/null and b/images/scorep_filtering.png differ
diff --git a/images/scorep_filtering_results.png b/images/scorep_filtering_results.png
new file mode 100644
index 00000000..fe17f84e
Binary files /dev/null and b/images/scorep_filtering_results.png differ
diff --git a/images/vampir_call_tree.png b/images/vampir_call_tree.png
new file mode 100644
index 00000000..bdac64c6
Binary files /dev/null and b/images/vampir_call_tree.png differ
diff --git a/images/vampir_communication_matrix.png b/images/vampir_communication_matrix.png
new file mode 100644
index 00000000..e505cb2d
Binary files /dev/null and b/images/vampir_communication_matrix.png differ
diff --git a/images/vampir_communication_matrix_max_message.png b/images/vampir_communication_matrix_max_message.png
new file mode 100644
index 00000000..9a61ab05
Binary files /dev/null and b/images/vampir_communication_matrix_max_message.png differ
diff --git a/images/vampir_communication_matrix_max_time.png b/images/vampir_communication_matrix_max_time.png
new file mode 100644
index 00000000..07dfce39
Binary files /dev/null and b/images/vampir_communication_matrix_max_time.png differ
diff --git a/images/vampir_communication_matrix_min_data_rate.png b/images/vampir_communication_matrix_min_data_rate.png
new file mode 100644
index 00000000..325c34ac
Binary files /dev/null and b/images/vampir_communication_matrix_min_data_rate.png differ
diff --git a/images/vampir_counter_data_timeline.png b/images/vampir_counter_data_timeline.png
new file mode 100644
index 00000000..707bb3f0
Binary files /dev/null and b/images/vampir_counter_data_timeline.png differ
diff --git a/images/vampir_counter_message_data_rates.png b/images/vampir_counter_message_data_rates.png
new file mode 100644
index 00000000..aa30d914
Binary files /dev/null and b/images/vampir_counter_message_data_rates.png differ
diff --git a/images/vampir_counter_mpi_latencies.png b/images/vampir_counter_mpi_latencies.png
new file mode 100644
index 00000000..b1d17215
Binary files /dev/null and b/images/vampir_counter_mpi_latencies.png differ
diff --git a/images/vampir_counter_select_metric.png b/images/vampir_counter_select_metric.png
new file mode 100644
index 00000000..7372bf95
Binary files /dev/null and b/images/vampir_counter_select_metric.png differ
diff --git a/images/vampir_io_summary.png b/images/vampir_io_summary.png
new file mode 100644
index 00000000..1821eaca
Binary files /dev/null and b/images/vampir_io_summary.png differ
diff --git a/images/vampir_io_summary_operations.png b/images/vampir_io_summary_operations.png
new file mode 100644
index 00000000..abf6795d
Binary files /dev/null and b/images/vampir_io_summary_operations.png differ
diff --git a/images/vampir_io_timeline.png b/images/vampir_io_timeline.png
new file mode 100644
index 00000000..cfe94589
Binary files /dev/null and b/images/vampir_io_timeline.png differ
diff --git a/images/vampir_main_view.png b/images/vampir_main_view.png
new file mode 100644
index 00000000..1fe38754
Binary files /dev/null and b/images/vampir_main_view.png differ
diff --git a/images/vampir_main_view2.png b/images/vampir_main_view2.png
new file mode 100644
index 00000000..a4ec51da
Binary files /dev/null and b/images/vampir_main_view2.png differ
diff --git a/images/vampir_message_summary.png b/images/vampir_message_summary.png
new file mode 100644
index 00000000..23645b9c
Binary files /dev/null and b/images/vampir_message_summary.png differ
diff --git a/images/vampir_message_summary_menu.png b/images/vampir_message_summary_menu.png
new file mode 100644
index 00000000..a51db375
Binary files /dev/null and b/images/vampir_message_summary_menu.png differ
diff --git a/images/vampir_performance_radar.png b/images/vampir_performance_radar.png
new file mode 100644
index 00000000..e0935675
Binary files /dev/null and b/images/vampir_performance_radar.png differ
diff --git a/images/vampir_performance_radar_data_rate.png b/images/vampir_performance_radar_data_rate.png
new file mode 100644
index 00000000..78137810
Binary files /dev/null and b/images/vampir_performance_radar_data_rate.png differ
diff --git a/images/vampir_performance_radar_menu.png b/images/vampir_performance_radar_menu.png
new file mode 100644
index 00000000..57a9c998
Binary files /dev/null and b/images/vampir_performance_radar_menu.png differ
diff --git a/images/vampir_process_summary.png b/images/vampir_process_summary.png
new file mode 100644
index 00000000..f2001b82
Binary files /dev/null and b/images/vampir_process_summary.png differ
diff --git a/images/vampir_process_summary_menu.png b/images/vampir_process_summary_menu.png
new file mode 100644
index 00000000..6ce7f827
Binary files /dev/null and b/images/vampir_process_summary_menu.png differ
diff --git a/images/vampir_process_summary_timeline.png b/images/vampir_process_summary_timeline.png
new file mode 100644
index 00000000..7170a3c0
Binary files /dev/null and b/images/vampir_process_summary_timeline.png differ
diff --git a/images/vampir_process_timeline.png b/images/vampir_process_timeline.png
new file mode 100644
index 00000000..7b83110e
Binary files /dev/null and b/images/vampir_process_timeline.png differ
diff --git a/images/vampir_process_timeline_exclusive.png b/images/vampir_process_timeline_exclusive.png
new file mode 100644
index 00000000..dc86cb23
Binary files /dev/null and b/images/vampir_process_timeline_exclusive.png differ
diff --git a/images/vampir_prrocess_sumamry_2_clusters.png b/images/vampir_prrocess_sumamry_2_clusters.png
new file mode 100644
index 00000000..b2f0d3b9
Binary files /dev/null and b/images/vampir_prrocess_sumamry_2_clusters.png differ
diff --git a/images/vampir_set_cluster.png b/images/vampir_set_cluster.png
new file mode 100644
index 00000000..643aa62a
Binary files /dev/null and b/images/vampir_set_cluster.png differ
diff --git a/images/vampir_zoom.png b/images/vampir_zoom.png
new file mode 100644
index 00000000..d866d340
Binary files /dev/null and b/images/vampir_zoom.png differ
diff --git a/software/profiling/Score-P.rst b/software/profiling/Score-P.rst
new file mode 100644
index 00000000..1b1217ad
--- /dev/null
+++ b/software/profiling/Score-P.rst
@@ -0,0 +1,979 @@
+.. _scorep:
+
+*******
+Score-P
+*******
+
+The Score-P (Scalable Performance Measurement Infrastructure for Parallel
+Codes) instrumenting tool is a scalable and easy-to-use tool suite for
+profiling, event tracing, and online analysis of HPC applications. It has been
+created in the German BMBF project SILC and the US DOE project PRIMA. Score-P
+is developed under a BSD 3-Clause License and governed by a meritocratic
+governance model.
+
+| Website: https://www.vi-hps.org/projects/score-p/
+| Email: support@score-p.org
+
+Score-P is installed with `Program Database Toolkit (PDT)
+`_ on Summit. PDT is a
+framework for analyzing source code written in several programming languages.
+Moreover, `Performance Application Programming Interface (PAPI)
+`_ is supported. PAPI counters are used to assess
+CPU performance. In this section, some approaches for profiling and tracing
+will be presented.
+
+Automatic Source Code Instrumentation
+=====================================
+
+Prefix method
+~~~~~~~~~~~~~
+
+In this approach we have to edit the Makefile and add the corresponding commands declarations.
+
+.. code::
+
+ CC = scorep gcc
+ CXX = scorep g++
+ F90 = scorep gfortran
+
+.. code::
+
+ CC = gcc
+ ..
+ tager: target.c
+ corep $(CC) -o $@ $^
+
+
+Wrapper method
+~~~~~~~~~~~~~~
+
+In this approach we do not need to edit any file as we use CMake. Some times only one of the methods works.
+
+.. code::
+
+ SCOREP_WRAPPER=off cmake -DCMAKE_C_COMPILER=scorep-gcc -DCMAKE_CXX_COMPILER=scorep-g++
+
+.. code::
+
+ SCOREP_WRAPPER=off ../configure CC=scorep-gcc CXX=scorep-g++ --disable-dependency-tracking
+
+.. code::
+
+ make SCOREP_WRAPPER_INSTRUMENTER_FLAGS=
+
+
+
+Instrumentation Overview
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+The following Score-P options are useful .
+
++-------------------------+------------------------+----------------+-----------------------------+
+| Type of Instrumentation | Instrumenter Switch | Default value | Instrumented routines |
++=========================+========================+================+=============================+
+| MPI | | ``--mpp=mpi`` | (auto) | configured by install |
+| | | ``--mpp=none`` | | |
++-------------------------+------------------------+----------------+-----------------------------+
+| SHMEM | | ``--mpp=shmem`` | (auto) | configured by install |
+| | | ``--mpp=none`` | | |
++-------------------------+------------------------+----------------+-----------------------------+
+| OpenCL | | ``--opencl`` | enabled | configured by install |
+| | | ``--noopencl`` | | |
++-------------------------+------------------------+----------------+-----------------------------+
+| OpenACC | | ``--openacc`` | enabled | configured by install |
+| | | ``--noopenacc`` | | |
++-------------------------+------------------------+----------------+-----------------------------+
+| CUDA | | ``--cuda`` | enabled | configured by install |
+| | | ``--nocuda`` | | |
++-------------------------+------------------------+----------------+-----------------------------+
+| OpenMP | | ``--thread=omp``/ | (auto) | all parallel constructs |
+| | | ``--thread=none`` | | |
+| | | ``--openmp`` | | |
+| | | ``--noopenmp`` | | |
++-------------------------+------------------------+----------------+-----------------------------+
+| Pthread | | ``--thread=pthread`` | (auto) | basic Pthread library calls |
++-------------------------+------------------------+----------------+-----------------------------+
+| Compiler | | ``--compiler`` | enabled | all |
+| | | ``--nocompiler`` | | |
++-------------------------+------------------------+----------------+-----------------------------+
+| PDT | | ``--pdt`` | disabled | all |
+| | | ``--nopdt`` | | |
++-------------------------+------------------------+----------------+-----------------------------+
+| POMP2 | | ``--pomp`` | disabled | manually annotated |
+| | | ``--nopomp`` | | |
++-------------------------+------------------------+----------------+-----------------------------+
+| Manual | | ``--user`` | disabled | manually annotated |
+| | | ``--nouser`` | | |
++-------------------------+------------------------+----------------+-----------------------------+
+
+
+
+Run-Time Environment Variables
+==============================
+
+The following Score-P environment variables may be useful in job submission scripts. See Score-P manual for more information.
+
++---------------------------------------+----------------------------------+-------------------------------------------------------------------------------------------------------------+
+| Variable | Default | Description |
++=======================================+==================================+=============================================================================================================+
+| SCOREP_ENABLE_PROFILING | TRUE | Enable profiling |
++---------------------------------------+----------------------------------+-------------------------------------------------------------------------------------------------------------+
+| SCOREP_ENABLE_TRACING | FALSE | Enable tracing |
++---------------------------------------+----------------------------------+-------------------------------------------------------------------------------------------------------------+
+| SCOREP_VERBOSE | FALSE | Activate verbose mode |
++---------------------------------------+----------------------------------+-------------------------------------------------------------------------------------------------------------+
+| SCOREP_TOTAL_MEMORY | 16000k | Total memory in bytes per process to be consumed by the measurement system |
++---------------------------------------+----------------------------------+-------------------------------------------------------------------------------------------------------------+
+| SCOREP_EXPERIMENT_DIRECTORY | directory based on current time | Declare the path with the directory for the data to be saved |
++---------------------------------------+----------------------------------+-------------------------------------------------------------------------------------------------------------+
+| SCOREP_OVERWRITE_EXPERIMENT_DIRECTORY | TRUE | Overwrite an existing experiment directory |
++---------------------------------------+----------------------------------+-------------------------------------------------------------------------------------------------------------+
+| SCOREP_EXECUTABLE | "" | Full path to the executable if Score-P cannot find it |
++---------------------------------------+----------------------------------+-------------------------------------------------------------------------------------------------------------+
+| SCOREP_PROFILING_MAX_CALLPATH_DEPTH | 30 | Maximum depth of the calltree |
++---------------------------------------+----------------------------------+-------------------------------------------------------------------------------------------------------------+
+| SCOREP_FILTERING_FILE | "" | A filename with the filter rules |
++---------------------------------------+----------------------------------+-------------------------------------------------------------------------------------------------------------+
+| SCOREP_METRIC_PAPI | "" | PAPI metric names to measure |
++---------------------------------------+----------------------------------+-------------------------------------------------------------------------------------------------------------+
+| SCOREP_METRIC_PAPI_PER_PROCESS | "" | List of requested PAPI metric names that will be recorded only by first thread of a process |
++---------------------------------------+----------------------------------+----------------+--------------------------------------------------------------------------------------------+
+| SCOREP_MPI_ENABLE_GROUPS | default | **Value** | **Description** |
+| | +----------------+--------------------------------------------------------------------------------------------+
+| | | ``all`` | All MPI functions |
+| | +----------------+--------------------------------------------------------------------------------------------+
+| | | ``cg`` | Communication and group management |
+| | +----------------+--------------------------------------------------------------------------------------------+
+| | | ``coll`` | Collective functions |
+| | +----------------+--------------------------------------------------------------------------------------------+
+| | | ``default`` | Includes cg, coll, env, io, p2p, rma, topo, xnonblock |
+| | +----------------+--------------------------------------------------------------------------------------------+
+| | | ``env`` | Environmental management |
+| | +----------------+--------------------------------------------------------------------------------------------+
+| | | ``err`` | MPI Error handling |
+| | +----------------+--------------------------------------------------------------------------------------------+
+| | | ``ext`` | External interface functions |
+| | +----------------+--------------------------------------------------------------------------------------------+
+| | | ``io`` | MPI file I/O |
+| | +----------------+--------------------------------------------------------------------------------------------+
+| | | ``p2p`` | Peer-to-perr communication |
+| | +----------------+--------------------------------------------------------------------------------------------+
+| | | ``misc`` | Miscellaneous |
+| | +----------------+--------------------------------------------------------------------------------------------+
+| | | ``perf`` | PControl |
+| | +----------------+--------------------------------------------------------------------------------------------+
+| | | ``rma`` | One sided communication |
+| | +----------------+--------------------------------------------------------------------------------------------+
+| | | ``spawn`` | Process management |
+| | +----------------+--------------------------------------------------------------------------------------------+
+| | | ``topo`` | Topology |
+| | +----------------+--------------------------------------------------------------------------------------------+
+| | | ``type`` | MPI datatype functions |
+| | +----------------+--------------------------------------------------------------------------------------------+
+| | | ``xnonblock`` | Ectended non-blocking events |
+| | +----------------+--------------------------------------------------------------------------------------------+
+| | | ``xreqtest`` | Test events for uncompleted requests |
+| | +----------------+--------------------------------------------------------------------------------------------+
+| | | ``none/no`` | Disable feature |
++---------------------------------------+----------------------------------+----------------+--------------------------------------------------------------------------------------------+
+| SCOREP_MPI_MEMORY_RECORDING | FALSE |Enable tracing of memory allocations done by calls to MPI_ALLOC_MEM and MPI_FREE_MEM, requires the MISC group|
++---------------------------------------+----------------------------------+-------------------------------------------------------------------------------------------------------------+
+| SCOREP_MPI_ONLINE_ANALYSIS | FALSE | Enable online MPI wait states analysis |
++---------------------------------------+----------------------------------+---------------------+---------------------------------------------------------------------------------------+
+| SCOREP_CUDA_ENABLE | no | **Value** | **Description** |
+| | +---------------------+---------------------------------------------------------------------------------------+
+| | | ``runtime`` | CUDA runtime API |
+| | +---------------------+---------------------------------------------------------------------------------------+
+| | | ``driver`` | CUDA driver API |
+| | +---------------------+---------------------------------------------------------------------------------------+
+| | | ``kernel`` | CUDA kernels |
+| | +---------------------+---------------------------------------------------------------------------------------+
+| | | ``kernel_serial`` | Serialized kernel recording |
+| | +---------------------+---------------------------------------------------------------------------------------+
+| | | ``kernel_counter`` | Fixed CUDA kernel metrics |
+| | +---------------------+---------------------------------------------------------------------------------------+
+| | | ``memcpy`` | CUDA memory copies |
+| | +---------------------+---------------------------------------------------------------------------------------+
+| | | ``sync`` | Record implicit and explicit CUDA synchronization |
+| | +---------------------+---------------------------------------------------------------------------------------+
+| | | ``idle`` | GPU compute idle time |
+| | +---------------------+---------------------------------------------------------------------------------------+
+| | | ``pure_idle`` | GPU idle time (memory copies are not idle) |
+| | +---------------------+---------------------------------------------------------------------------------------+
+| | | ``gpumemusage`` | Record CUDA memory (de)allocations as a counter |
+| | +---------------------+---------------------------------------------------------------------------------------+
+| | | ``references`` | Record references between CUDA activities |
+| | +---------------------+---------------------------------------------------------------------------------------+
+| | | ``flushatexit`` | Flush CUDA activity buffer at program exit |
+| | +---------------------+---------------------------------------------------------------------------------------+
+| | | ``default/yes/1`` | Includes runtime, kernel, memcpy |
+| | +---------------------+---------------------------------------------------------------------------------------+
+| | | ``none/no`` | Disable feature |
++---------------------------------------+----------------------------------+---------------------+---------------------------------------------------------------------------------------+
+| SCOREP_CUDA_BUFFER | 1M | Total memory in bytes for the CUDA record buffer |
++---------------------------------------+----------------------------------+-----------------------+-------------------------------------------------------------------------------------+
+| SCOREP_OPENACC_ENABLE | no | **Value** | **Description** |
+| | +-----------------------+-------------------------------------------------------------------------------------+
+| | | ``regions`` | OpenACC regions |
+| | +-----------------------+-------------------------------------------------------------------------------------+
+| | | ``wait`` | OpenACC wait operations |
+| | +-----------------------+-------------------------------------------------------------------------------------+
+| | | ``enqueue`` | OpenACC enqueue operations |
+| | +-----------------------+-------------------------------------------------------------------------------------+
+| | | ``device_alloc`` | OpenACC device memory allocations |
+| | +-----------------------+-------------------------------------------------------------------------------------+
+| | | ``kernel_properties`` | Record kernel properties such as the kernel name, gang, worker and vector size |
+| | +-----------------------+-------------------------------------------------------------------------------------+
+| | | ``variable_names`` | Record variable names for OpenACC data allocation and enqueue upload/download |
+| | +-----------------------+-------------------------------------------------------------------------------------+
+| | | ``default/yes/1`` | OpenACC regions,enqueue and wait operations |
+| | +-----------------------+-------------------------------------------------------------------------------------+
+| | | ``none/no`` | Disable feature |
++---------------------------------------+----------------------------------+-----------------------+-------------------------------------------------------------------------------------+
+| SCOREP_MEMORY_RECORDING | FALSE | Memory (de)allocations are recorded via libc/C++ API |
++---------------------------------------+----------------------------------+-------------------------------------------------------------------------------------------------------------+
+
+Example Application: MiniWeather
+================================
+
+We'll use the open-source `MiniWeather
+`_ application to demonstrate the
+capabilities of Score-P.
+
+Get the Source Code
+~~~~~~~~~~~~~~~~~~~
+
+.. code::
+
+ $ git clone https://github.com/mrnorman/miniWeather.git
+ $ cd miniWeather/c/build
+
+Compile the Application
+~~~~~~~~~~~~~~~~~~~~~~~
+
+MiniWeather supports several build modes: serial, MPI, MPI+OpenMP, and
+MPI+OpenACC. In order to compile the application, we'll be using the PGI
+toolchain, and bring into our environment both ``cmake`` and a parallel
+installation of ``NetCDF``.
+
+.. code::
+
+ $ module load pgi parallel-netcdf cmake
+ $ ./cmake_summit_pgi.sh
+
+
+After the compilation ends, there will be the executables called `serial`, `openacc`, `mpi`, `openmp`
+
+Below, we'll look at using Score-P to profile each case.
+
+
+Modifications
+-------------
+
+- Edit the makefile and replace ``mpic++`` with ``scorep --mpp=mpi mpic++``.
+
+
+Instrumenting the Serial Version of MiniWeather
+-----------------------------------------------
+
+For a serial application, we should not use a Makefile with a programming
+model such as MPI or OpenMP. However, as the source code for this **specific**
+case includes MPI headers that are not excluded during the compilation of the
+serial version, we should declare a Makefile with MPI.
+
+- Edit the `cmake_summit_pgi.sh` and replace
+
+.. code::
+
+ cmake -DCMAKE_CXX_COMPILER=mpicxx
+
+with
+
+.. code::
+
+ SCOREP_WRAPPER=off cmake -DCMAKE_CXX_COMPILER=scorep-mpicxx
+
+
+and execute
+
+.. code::
+
+ $ module load pgi
+ $ module load parallel-netcdf
+ $ module load scorep/6.0
+ $ make serial SCOREP_WRAPPER_INSTRUMENTER_FLAGS="--mpp=mpi
+
+If there were no MPI headers, you should edit the `cmake_summit_pgi.sh` with:
+
+.. code::
+
+ cmake -DCMAKE_CXX_COMPILER=scorep-pgc++
+
+and execute:
+
+.. code::
+
+ make serial
+
+If you want to add PDT, then use the option ``--pdt`` in the variable ``SCOREP_WRAPPER_INSTRUMENTER_FLAGS``
+
+Add to your submission script the Score-P variables that you want to use (or
+uncomment them below). By default the Score-P will apply profiling, and not apply tracing.
+
+.. code::
+
+ #PAPI metrics
+ export SCOREP_METRIC_PAPI=PAPI_TOT_INS,PAPI_TOT_CYC,PAPI_FP_OPS
+
+ export SCOREP_MPI_ENABLE_GROUPS=ALL
+ export SCOREP_TOTAL_MEMORY=20MB
+
+ time jsrun -n 1 -r 1 -a 1 -c 1 ./serial
+
+
+- When the execution finishes, one directory is created named ``scorep-_