一个适用于TI ARM与DSP双核平台的makefile。该makefile示意了如何同时编译出ARM平台和DSP平台的执行文件,以便于比较ARM核和DSP核在进行运算时表现出来的性能差异。
makefile
#############################################################################
# Makefile #
# #
# Builds the emqbit benchmark source for ARM and DSP #
#############################################################################
#
#
#############################################################################
# #
# Copyright (C) 2010 Texas Instruments Incorporated #
# http://www.ti.com/ #
# #
#############################################################################
#
#
#############################################################################
# #
# Redistribution and use in source and binary forms, with or without #
# modification, are permitted provided that the following conditions #
# are met: #
# #
# Redistributions of source code must retain the above copyright #
# notice, this list of conditions and the following disclaimer. #
# #
# Redistributions in binary form must reproduce the above copyright #
# notice, this list of conditions and the following disclaimer in the #
# documentation and/or other materials provided with the #
# distribution. #
# #
# Neither the name of Texas Instruments Incorporated nor the names of #
# its contributors may be used to endorse or promote products derived #
# from this software without specific prior written permission. #
# #
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS #
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT #
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR #
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT #
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, #
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT #
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, #
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY #
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT #
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE #
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #
# #
#############################################################################
PROJNAME := matmult
# ----------------------------------------------------------------------------
# Name of the ARM GCC cross compiler
# ----------------------------------------------------------------------------
ARM_TOOLCHAIN_PREFIX ?= arm-none-linux-gnueabi-
ifdef ARM_TOOLCHAIN_PATH
ARM_CC := $(ARM_TOOLCHAIN_PATH)/bin/$(ARM_TOOLCHAIN_PREFIX)gcc
else
ARM_CC := $(ARM_TOOLCHAIN_PREFIX)gcc
endif
# Pick up any ARM compiler and linker flags from the environment
ARM_CFLAGS = $(CFLAGS)
ARM_CFLAGS += -std=gnu99
-Wdeclaration-after-statement -Wall -Wno-trigraphs
-fno-strict-aliasing -fno-common -fno-omit-frame-pointer
-c -O3
ARM_LDFLAGS = $(LDFLAGS)
ARM_LDFLAGS+=-lm
# ----------------------------------------------------------------------------
# Name of the DSP compiler
# TI C6RunApp Frontend (if path variable provided, use it, otherwise assume
# the tools are in the path)
# ----------------------------------------------------------------------------
C6RUN_TOOLCHAIN_PREFIX=c6runapp-
ifdef C6RUN_TOOLCHAIN_PATH
C6RUN_CC := $(C6RUN_TOOLCHAIN_PATH)/bin/$(C6RUN_TOOLCHAIN_PREFIX)cc
else
C6RUN_CC := $(C6RUN_TOOLCHAIN_PREFIX)cc
endif
DSPLIB_PATH := $(HOME)/C64x+DSPLIB/dsplib_v210
C6RUN_CFLAGS = -c -O3 -I$(DSPLIB_PATH) -DUSE_DSPLIB
C6RUN_LDFLAGS=
# ----------------------------------------------------------------------------
# List of source files
# ----------------------------------------------------------------------------
SRCS := $(PROJNAME).c
ARM_OBJS := $(SRCS:%.c=gpp/%.o)
DSP_OBJS := $(SRCS:%.c=dsp/%.o)
# ----------------------------------------------------------------------------
# Makefile targets
# ----------------------------------------------------------------------------
.PHONY : dsp dsp_clean gpp gpp_clean all clean
all: dsp gpp
clean: dsp_clean gpp_clean
gpp: gpp/.created $(ARM_OBJS)
$(ARM_CC) $(ARM_LDFLAGS) -o $(PROJNAME)_gpp $(ARM_OBJS)
gpp/%.o : %.c
$(ARM_CC) $(ARM_CFLAGS) $(CINCLUDES) -o $@ $<
gpp/.created:
@mkdir -p gpp
@touch gpp/.created
gpp_clean:
@rm -Rf $(PROJNAME)_gpp
@rm -Rf gpp
dsp: dsp/.created $(DSP_OBJS)
$(C6RUN_CC) $(C6RUN_LDFLAGS) -o $(PROJNAME)_dsp $(DSP_OBJS) $(DSPLIB_PATH)/dsplib64plus.lib
dsp/%.o : %.c
$(C6RUN_CC) $(C6RUN_CFLAGS) $(CINCLUDES) -o $@ $<
dsp/.created:
@mkdir -p dsp
@touch dsp/.created
dsp_clean:
@rm -Rf $(PROJNAME)_dsp
@rm -Rf dsp
源文件:
matmult.c
#include
#include
#include
#ifdef USE_DSPLIB
#include "src/DSP_mat_mul/DSP_mat_mul.h"
#endif
// Random matrix fill
void matfill(int size, short *mat)
{
int i, j;
for (i = 0; i < size; i++)
{
for (j = 0; j < size; j++)
{
mat[j+size*i] = (short) (rand() % 100);
}
}
}
void printmat(int size, short *mat)
{
}
// Matrix multiply using Fixed point C64x+ DSPLib function
void matmult(int size, int shift, short *A, short *B, short *C)
{
#ifdef USE_DSPLIB
DSP_mat_mul( A,size,size,
B, size,
C,
shift );
#else
int i, j, k;
int sum;
for (i = 0; i < size; i++)
{
for (j = 0; j < size; j++)
{
sum = 0;
for (k = 0; k < size; k++)
{
sum += (A[i*size + k] * B[k*size + j]);
}
C[i*size + j] = sum >> shift;
}
}
#endif
}
//square matrices only
int main(int argc, char *argv[])
{
short *A, *B, *C;
int matsize, shift;
time_t t;
if (argc != 2) exit(-1);
matsize = atoi(argv[1]);
shift = 0;
while ((matsize>>shift) > 0)
{
++shift;
}
srand(time(NULL));
if (matsize == 0) exit(-1);
// Allocate space for matrices
A = calloc(matsize*matsize, sizeof(short));
matfill(matsize, A);
B = calloc(matsize*matsize, sizeof(short));
matfill(matsize, B);
C = calloc(matsize*matsize, sizeof(short));
// Perform multiply and time it
t = time(NULL);
matmult(matsize, shift, A, B, C);
t = time(NULL) - t;
printf("time of multiply: %d seconds
",(int)t);
// Free allocated spaces
free(A);
free(B);
free(C);
}
运行结果:
root@dm37x-evm:/opt# for i in 100 200 300 400 500 600; do ./matmult_gpp $i; done
time of multiply: 0 seconds
time of multiply: 0 seconds
time of multiply: 2 seconds
time of multiply: 6 seconds
time of multiply: 18 seconds
time of multiply: 39 seconds
root@dm37x-evm:/opt# for i in 100 200 300 400 500 600; do ./matmult_dsp $i; done
time of multiply: 0 seconds
time of multiply: 0 seconds
time of multiply: 0 seconds
time of multiply: 0 seconds
time of multiply: 1 seconds
time of multiply: 3 seconds
可见当数据量大时,DSP表现出来的性能优越性更明显。