Example 1.4 MPI_Pack and MPI_Unpack Usage

MPI_SUM, MPI_MAXLOC, MPI_MINLOC, amongst other pre-defined reduction operations in MPI, are used in this example to find the sum of all integrated values of all participating processors. The processor number that gives rise to the maximum (minimum) integrated value is also returned. To allow the “user” more control of this integration program, the “master” processor reads in the number of increments within the integration interval and the lower and upper limits of integration. These parameters are packed via MPI_Pack before broadcasting to all processors to minimize communication overhead. Once the “worker” processor receives the packed data, it must unpack it (via MPI_Unpack) to recover the actual data.

      Program Example1_4
c#######################################################################
c#
c# This is an MPI example on parallel integration
c# It demonstrates the use of :
c#
c# * MPI_Init
c# * MPI_Comm_rank
c# * MPI_Comm_size
c# * MPI_Pack
c# * MPI_Unpack
c# * MPI_Reduce
c# * MPI_SUM, MPI_MAXLOC, and MPI_MINLOC
c# * MPI_Finalize
c#
c# Dr. Kadin Tseng
c# Scientific Computing and Visualization
c# Boston University
c# 1998
c#
c#######################################################################
      implicit none
      integer n, p, i, j, ierr, m, master
      real h, result, a, b, integral, pi

      include "mpif.h"  !! This brings in pre-defined MPI constants, ...
      integer Iam, source, dest, tag, status(MPI_STATUS_SIZE)
      real my_result(2), min_result(2), max_result(2)
      integer Nbytes
      parameter (Nbytes=1000, master=0)
      character scratch(Nbytes)   !! needed for MPI_pack/MPI_unpack; counted in bytes
      integer index, minid, maxid

c**Starts MPI processes ...

      call MPI_Init(ierr)                               !! starts MPI
      call MPI_Comm_rank(MPI_COMM_WORLD, Iam, ierr)    !! get current process id
      call MPI_Comm_size(MPI_COMM_WORLD, p, ierr)       !! get number of processes

      pi = acos(-1.0)   !!  = 3.14159...

      dest = 0          !! define the process that computes the final result
      tag = 123         !! set the tag to identify this particular job

      if(Iam .eq. 0) then
        print *,'The requested number of processors =',p
        print *,'enter number of increments within each process'
        read(*,*)n
        print *,'enter a & m'
        print *,' a = lower limit of integration'
        print *,' b = upper limit of integration'
        print *,'   = m * pi/2'
        read(*,*)a,m

        b = m * pi / 2.

c**to be efficient, pack all things into a buffer for broadcast
        index = 1
        call MPI_Pack(n, 1, MPI_INTEGER, scratch, Nbytes, index,
     &                MPI_COMM_WORLD, ierr)
        call MPI_Pack(a, 1, MPI_REAL, scratch, Nbytes, index,
     &                MPI_COMM_WORLD, ierr)
        call MPI_Pack(b, 1, MPI_REAL, scratch, Nbytes, index,
     &                MPI_COMM_WORLD, ierr)
        call MPI_Bcast(scratch, Nbytes, MPI_PACKED, 0, MPI_COMM_WORLD,
     &                 ierr)

      else

       call MPI_Bcast(scratch, Nbytes, MPI_PACKED, 0, MPI_COMM_WORLD,
     &                 ierr)
c**things received have been packed, unpack into expected locations
        index = 1
        call MPI_Unpack(scratch, Nbytes, index, n, 1, MPI_INTEGER,
     &                MPI_COMM_WORLD, ierr)
        call MPI_Unpack(scratch, Nbytes, index, a, 1, MPI_REAL,
     &                MPI_COMM_WORLD, ierr)
        call MPI_Unpack(scratch, Nbytes, index, b, 1, MPI_REAL,
     &                MPI_COMM_WORLD, ierr)

      endif


      h = (b-a)/n/p     !! length of increment

      my_result(1) = integral(a,Iam,h,n)
      my_result(2) = Iam

      write(*,"('Process ',i2,' has the partial result of',f10.6)")
     &          Iam,my_result(1)

      call MPI_Reduce(my_result, result, 1, MPI_REAL, MPI_SUM, dest,
     &                  MPI_COMM_WORLD, ierr)  !! data reduction by way of MPI_SUM

      call MPI_Reduce(my_result, min_result, 1, MPI_2REAL, MPI_MINLOC,
     &                dest, MPI_COMM_WORLD, ierr)  !! data reduction by way of MPI_MINLOC

      call MPI_Reduce(my_result, max_result, 1, MPI_2REAL, MPI_MAXLOC,
     &                dest, MPI_COMM_WORLD, ierr)  !! data reduction by way of MPI_MAXLOC

      if(Iam .eq. master) then
        print *,'The result =',result
        maxid = max_result(2)
        print *,'Proc',maxid,' has  largest integrated value of',
     &     max_result(1)
        minid = min_result(2)
        print *,'Proc',minid,' has smallest integrated value of',
     &     min_result(1)
      endif

      call MPI_Finalize(ierr)                           !! let MPI finish up ...

      stop
      end
      real function integral(a,i,h,n)
      implicit none
      integer n, i, j
      real h, h2, aij, a
      real fct, x

      fct(x) = cos(x)              !! kernel of the integral
      integral = 0.0               !! initialize integral
      h2 = h/2.
      do j=0,n-1                   !! sum over all "j" integrals
        aij = a + (i*n +j)*h       !! lower limit of "j" integral
        integral = integral + fct(aij+h2)*h
      enddo

      return
      end