1 #-------------------------------------------------------------------------------
3 # Copyright 2010-2012 Duane Merrill
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 # For more information, see our Google Code project site:
18 # http://code.google.com/p/back40computing/
20 #-------------------------------------------------------------------------------
22 #-------------------------------------------------------------------------------
23 # Build script for project
24 #-------------------------------------------------------------------------------
28 CFLAGS
= -O3
-funroll-loops
-march
=nocona
-maccumulate-outgoing-args
-Wno-write-strings
-Wno-unused-result
-Wall
32 NVCC
= "$(shell which nvcc)"
33 NVCC_VERSION
= $(strip $(shell nvcc
--version | grep release | sed
's/.*release //' | sed
's/,.*//'))
39 OSUPPER
= $(shell uname
-s
2>/dev
/null | tr
[:lower
:] [:upper
:])
41 #-------------------------------------------------------------------------------
43 #-------------------------------------------------------------------------------
45 GEN_SM30
= -gencode
=arch
=compute_30
,code
=\"sm_30
,compute_30
\"
46 GEN_SM20
= -gencode
=arch
=compute_20
,code
=\"sm_20
,compute_20
\"
47 GEN_SM13
= -gencode
=arch
=compute_13
,code
=\"sm_13
,compute_13
\"
48 GEN_SM11
= -gencode
=arch
=compute_11
,code
=\"sm_11
,compute_11
\"
49 GEN_SM10
= -gencode
=arch
=compute_10
,code
=\"sm_10
,compute_10
\"
50 #SM_TARGETS = $(GEN_SM20) $(GEN_SM13) $(GEN_SM10)
51 SM_TARGETS
= $(GEN_SM30
)
53 #-------------------------------------------------------------------------------
55 #-------------------------------------------------------------------------------
58 TUNE_ARCH
= $(tunearch
)
63 ifeq ($(TUNE_ARCH
), 300)
64 TUNE_SM_TARGETS
= $(GEN_SM30
)
66 ifeq ($(TUNE_ARCH
), 200)
67 TUNE_SM_TARGETS
= $(GEN_SM20
)
69 ifeq ($(TUNE_ARCH
), 130)
70 TUNE_SM_TARGETS
= $(GEN_SM13
)
72 ifeq ($(TUNE_ARCH
), 100)
73 TUNE_SM_TARGETS
= $(GEN_SM10
)
76 #-------------------------------------------------------------------------------
78 #-------------------------------------------------------------------------------
81 TUNE_SIZE
= $(tunesize
)
87 #-------------------------------------------------------------------------------
89 #-------------------------------------------------------------------------------
92 #-------------------------------------------------------------------------------
94 #-------------------------------------------------------------------------------
96 CUDA_INC
= "$(shell dirname $(NVCC))/../include"
97 INC
= -I
$(CUDA_INC
) -I.
99 #-------------------------------------------------------------------------------
101 #-------------------------------------------------------------------------------
105 #-------------------------------------------------------------------------------
107 #-------------------------------------------------------------------------------
110 ifneq ($(force64
), 1)
111 # Compile with 32-bit device pointers by default
119 NVCCFLAGS
= -Xptxas
-v
-Xcudafe
-\
# -cuda --ptxas-options=-v
121 ifeq (WIN_NT
, $(findstring WIN_NT
, $(OSUPPER
)))
122 NVCCFLAGS
+= -Xcompiler
/bigobj
-Xcompiler
/Zm500
125 ifeq (,$(findstring 3.0, $(NVCC_VERSION
)))
127 # Disable the ABI by default for 3.1+
128 NVCCFLAGS
+= -Xptxas
-abi
=no
141 NVCCFLAGS
+= -maxrregcount
$(maxregisters
)
144 ifeq (4.1, $(findstring 4.1, $(NVCC_VERSION
)))
145 # Disable nvvm for 4.1
149 #-------------------------------------------------------------------------------
151 #-------------------------------------------------------------------------------
154 $(wildcard b40c
/util
/*.cuh
) \
155 $(wildcard b40c
/util
/**/*.cuh
) \
156 $(wildcard b40c
/scan
/*.cuh
) \
157 $(wildcard b40c
/scan
/**/*.cuh
) \
158 $(wildcard b40c
/partition
/*.cuh
) \
159 $(wildcard b40c
/partition
/**/*.cuh
) \
160 $(wildcard b40c
/radix_sort
/*.cuh
) \
161 $(wildcard b40c
/radix_sort
/**/*.cuh
)
163 #-------------------------------------------------------------------------------
164 # g++ and its options
165 #-------------------------------------------------------------------------------
167 CUDALIBFLAG
= -L
/usr
/local
/cuda
/lib64
/ -lcuda
-lcudart
168 CFLAGS
= -O3
-Wall
-funroll-loops
-fprefetch-loop-arrays
-fopenmp
-std
=c
++0x
-lm
170 ifneq ($(disablempopcnt
), 1)
177 #-------------------------------------------------------------------------------
179 #-------------------------------------------------------------------------------
182 all: 2bwt_builder_gpu
2bwt_builder_cpu AsiicBWT2BWT
183 #2BWT-Builder 2BWT-Interface.o Sample overlap-sample
185 all: 2bwt_builder_cpu AsiicBWT2BWT
186 # 2BWT-Builder 2BWT-Interface.o Sample overlap-sample
190 #-------------------------------------------------------------------------------
192 #-------------------------------------------------------------------------------
194 %.o
: %.
cpp %.h
$(DEPS
)
195 $(CC
) $(CFLAGS
) -c
$< -o
$@
197 2bwt_builder_cpu
: readbwt.
cpp lv2_cpu_sort.h
$(DEPS
)
198 $(CC
) $(CFLAGS
) -D DISABLE_GPU readbwt.
cpp $(ZLIB
) -o
2bwt_builder_cpu
201 .lv2_gpu_sort_
$(NVCC_VERSION
)_
$(ARCH_SUFFIX
).
cpp: lv2_gpu_sort.cu lv2_gpu_sort.h
$(DEPS
)
202 $(NVCC
) $(SM_TARGETS
) lv2_gpu_sort.cu
$(NVCCFLAGS
) $(ARCH
) $(INC
) $(LIBS
) -O3
-c
-o .lv2_gpu_sort_
$(NVCC_VERSION
)_
$(ARCH_SUFFIX
).
cpp
204 .lv2_gpu_sort_
$(NVCC_VERSION
)_
$(ARCH_SUFFIX
).o
: .lv2_gpu_sort_
$(NVCC_VERSION
)_
$(ARCH_SUFFIX
).
cpp $(DEPS
)
205 $(CC
) $(CFLAGS
) -c .lv2_gpu_sort_
$(NVCC_VERSION
)_
$(ARCH_SUFFIX
).
cpp -o .lv2_gpu_sort_
$(NVCC_VERSION
)_
$(ARCH_SUFFIX
).o
207 2bwt_builder_gpu
: readbwt.
cpp lv2_cpu_sort.h .lv2_gpu_sort_
$(NVCC_VERSION
)_
$(ARCH_SUFFIX
).o
$(DEPS
)
208 $(CC
) $(CFLAGS
) $(CUDALIBFLAG
) readbwt.
cpp .lv2_gpu_sort_
$(NVCC_VERSION
)_
$(ARCH_SUFFIX
).o
$(ZLIB
) -o
2bwt_builder_gpu
212 AsiicBWT2BWT
: AsiicBWT2BWT.c
213 $(CC
) $(CFLAGS
) AsiicBWT2BWT.c
-o AsiicBWT2BWT
$(LIBS
)
216 rm -f
*.i
* *.cubin
*.cu.c
*.cudafe
* *.fatbin.c
*.ptx
*.hash
*.cu.
cpp *.o
2bwt_builder_
*pu AsiicBWT2BWT