cuda package from Carlos
This commit is contained in:
parent
fcaabd2c0b
commit
726d7ca1cc
11
Makefile.in
11
Makefile.in
@ -1,4 +1,4 @@
|
||||
#
|
||||
|
||||
# default base directory for YAP installation
|
||||
# (EROOT for architecture-dependent files)
|
||||
#
|
||||
@ -702,6 +702,7 @@ all: startup.yss
|
||||
@INSTALL_DLLS@ (cd library/random; $(MAKE))
|
||||
@INSTALL_DLLS@ (cd library/regex; $(MAKE))
|
||||
@INSTALL_DLLS@ (cd library/rltree; $(MAKE))
|
||||
@INSTALL_DLLS@ (cd packages/yap-lbfgs; $(MAKE))
|
||||
@ENABLE_WINCONSOLE@ (cd swi/console; $(MAKE))
|
||||
@INSTALL_DLLS@ (cd library/system; $(MAKE))
|
||||
@INSTALL_DLLS@ (cd library/tries; $(MAKE))
|
||||
@ -729,6 +730,7 @@ all: startup.yss
|
||||
@ENABLE_CUDD@ (cd packages/ProbLog/simplecudd_lfi; $(MAKE))
|
||||
@ENABLE_JPL@ @INSTALL_DLLS@ (cd packages/jpl; $(MAKE))
|
||||
@ENABLE_PYTHON@ @INSTALL_DLLS@ (cd packages/python; $(MAKE))
|
||||
@ENABLE_CUDA@ @INSTALL_DLLS@ (cd packages/cuda; $(MAKE))
|
||||
|
||||
startup.yss: yap@EXEC_SUFFIX@ $(PL_SOURCES)
|
||||
-rm -f startup.yss
|
||||
@ -775,6 +777,7 @@ install_unix: startup.yss libYap.a
|
||||
@INSTALL_DLLS@ (cd library/random; $(MAKE) install)
|
||||
@INSTALL_DLLS@ (cd library/regex; $(MAKE) install)
|
||||
@INSTALL_DLLS@ (cd library/rltree; $(MAKE) install)
|
||||
@INSTALL_DLLS@ (cd packages/yap-lbfgs; $(MAKE) install)
|
||||
@INSTALL_DLLS@ (cd library/system; $(MAKE) install)
|
||||
@INSTALL_DLLS@ (cd library/tries; $(MAKE) install)
|
||||
@ENABLE_GECODE@ @INSTALL_DLLS@ (cd library/gecode; $(MAKE) install)
|
||||
@ -799,6 +802,7 @@ install_unix: startup.yss libYap.a
|
||||
@ENABLE_REAL@ @INSTALL_DLLS@ (cd packages/real; $(MAKE) install)
|
||||
@ENABLE_JPL@ @INSTALL_DLLS@ (cd packages/jpl; $(MAKE) install)
|
||||
@ENABLE_PYTHON@ @INSTALL_DLLS@ (cd packages/python; $(MAKE) install)
|
||||
@ENABLE_CUDA@ @INSTALL_DLLS@ (cd packages/cuda; $(MAKE))
|
||||
mkdir -p $(DESTDIR)$(INCLUDEDIR)
|
||||
mkdir -p $(DESTDIR)$(INCLUDEDIR)/src
|
||||
$(INSTALL) $(HEADERS) $(DESTDIR)$(INCLUDEDIR)/src
|
||||
@ -813,6 +817,7 @@ install_unix: startup.yss libYap.a
|
||||
@ENABLE_BDDLIB@ @INSTALL_DLLS@ (cd packages/bdd; $(MAKE) install)
|
||||
@ENABLE_CUDD@ (cd packages/ProbLog/simplecudd; $(MAKE) install)
|
||||
@ENABLE_CUDD@ (cd packages/ProbLog/simplecudd_lfi; $(MAKE) install)
|
||||
@ENABLE_CUDA@ @INSTALL_DLLS@ (cd packages/cuda; $(MAKE) install)
|
||||
|
||||
|
||||
install_win32: startup.yss @ENABLE_WINCONSOLE@ yap-win@EXEC_SUFFIX@
|
||||
@ -843,6 +848,7 @@ install_win32: startup.yss @ENABLE_WINCONSOLE@ yap-win@EXEC_SUFFIX@
|
||||
(cd library/regex; $(MAKE) install)
|
||||
(cd library/rltree; $(MAKE) install)
|
||||
(cd library/system; $(MAKE) install)
|
||||
@INSTALL_DLLS@ (cd packages/yap-lbfgs; $(MAKE) install)
|
||||
@ENABLE_WINCONSOLE@ (cd swi/console; $(MAKE) install)
|
||||
@INSTALL_MATLAB@ (cd library/matlab; $(MAKE) install)
|
||||
@ENABLE_REAL@ (cd packages/real; $(MAKE) install)
|
||||
@ -871,6 +877,7 @@ install_win32: startup.yss @ENABLE_WINCONSOLE@ yap-win@EXEC_SUFFIX@
|
||||
@ENABLE_BDDLIB@ (cd packages/bdd; $(MAKE) install)
|
||||
@ENABLE_CUDD@ (cd packages/ProbLog/simplecudd; $(MAKE) install)
|
||||
@ENABLE_CUDD@ (cd packages/ProbLog/simplecudd_lfi; $(MAKE) install)
|
||||
@ENABLE_CUDA@ @INSTALL_DLLS@ (cd packages/cuda; $(MAKE) install)
|
||||
|
||||
install_library: @YAPLIB@
|
||||
mkdir -p $(DESTDIR)$(INCLUDEDIR)
|
||||
@ -914,6 +921,7 @@ clean: clean_docs
|
||||
@INSTALL_DLLS@ (cd library/random; $(MAKE) clean)
|
||||
@INSTALL_DLLS@ (cd library/regex; $(MAKE) clean)
|
||||
@INSTALL_DLLS@ (cd library/rltree; $(MAKE) clean)
|
||||
@INSTALL_DLLS@ (cd packages/yap-lbfgs; $(MAKE) clean)
|
||||
@ENABLE_WINCONSOLE@ (cd swi/console; $(MAKE) clean)
|
||||
@INSTALL_DLLS@ (cd library/system; $(MAKE) clean)
|
||||
@INSTALL_DLLS@ (cd library/tries; $(MAKE) clean)
|
||||
@ -941,6 +949,7 @@ clean: clean_docs
|
||||
@ENABLE_CUDD@ (cd packages/ProbLog/simplecudd_lfi; $(MAKE) clean)
|
||||
@ENABLE_JPL@ @INSTALL_DLLS@ (cd packages/jpl; $(MAKE) clean)
|
||||
@ENABLE_PYTHON@ @INSTALL_DLLS@ (cd packages/python; $(MAKE) clean)
|
||||
@ENABLE_CUDA@ @INSTALL_DLLS@ (cd packages/cuda; $(MAKE) clean)
|
||||
|
||||
|
||||
|
||||
|
181
configure
vendored
181
configure
vendored
@ -705,6 +705,8 @@ ENABLE_CLIB
|
||||
ENABLE_CHR
|
||||
CUDA_LDFLAGS
|
||||
CUDA_CPPFLAGS
|
||||
CUDA_SHLIB_LD
|
||||
ENABLE_CUDA
|
||||
NO_BUILTIN_REGEXP
|
||||
YAP_EXTRAS
|
||||
SONAMEFLAG
|
||||
@ -735,12 +737,12 @@ CPLINT_CFLAGS
|
||||
CPLINT_LIBS
|
||||
ENABLE_PRISM
|
||||
ENABLE_GECODE
|
||||
NVCC
|
||||
PYTHON
|
||||
REXE
|
||||
INSTALL_INFO
|
||||
MPI_CC
|
||||
AR
|
||||
INDENT
|
||||
RANLIB
|
||||
INSTALL_DATA
|
||||
INSTALL_SCRIPT
|
||||
@ -855,6 +857,7 @@ with_R
|
||||
with_python
|
||||
with_judy
|
||||
with_minisat
|
||||
with_cuda
|
||||
with_cudd
|
||||
enable_myddas
|
||||
enable_myddas_stats
|
||||
@ -1538,6 +1541,7 @@ Optional Packages:
|
||||
--with-python=DIR interface to R language
|
||||
--with-judy=DIR UDI needs judy library
|
||||
--enable-minisat use minisat interface
|
||||
--enable-cuda use minisat interface
|
||||
--with-cudd=DIR use CUDD package in DIR
|
||||
--with-java=JAVA_HOME use Java instalation in JAVA_HOME
|
||||
--with-readline=DIR use GNU Readline Library in DIR
|
||||
@ -4755,6 +4759,21 @@ fi
|
||||
|
||||
|
||||
|
||||
# Check whether --with-cuda was given.
|
||||
if test "${with_cuda+set}" = set; then :
|
||||
withval=$with_cuda; if test "$withval" = yes; then
|
||||
yap_cv_cuda=/usr
|
||||
elif test "$withval" = no; then
|
||||
yap_cv_cuda=no
|
||||
else
|
||||
yap_cv_cuda="$withval"
|
||||
fi
|
||||
else
|
||||
yap_cv_cuda=no
|
||||
fi
|
||||
|
||||
|
||||
|
||||
# Check whether --with-cudd was given.
|
||||
if test "${with_cudd+set}" = set; then :
|
||||
withval=$with_cudd; yap_cv_cudd="$withval"
|
||||
@ -5151,9 +5170,6 @@ else
|
||||
ENABLE_CUDD=""
|
||||
fi
|
||||
|
||||
CUDA_LDFLAGS=""
|
||||
CUDA_CPPFLAGS=""
|
||||
|
||||
|
||||
|
||||
if test "$use_condor" = yes
|
||||
@ -5459,98 +5475,6 @@ else
|
||||
RANLIB="$ac_cv_prog_RANLIB"
|
||||
fi
|
||||
|
||||
if test -n "$ac_tool_prefix"; then
|
||||
# Extract the first word of "${ac_tool_prefix}indent", so it can be a program name with args.
|
||||
set dummy ${ac_tool_prefix}indent; ac_word=$2
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
|
||||
$as_echo_n "checking for $ac_word... " >&6; }
|
||||
if ${ac_cv_prog_INDENT+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
if test -n "$INDENT"; then
|
||||
ac_cv_prog_INDENT="$INDENT" # Let the user override the test.
|
||||
else
|
||||
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
|
||||
for as_dir in $PATH
|
||||
do
|
||||
IFS=$as_save_IFS
|
||||
test -z "$as_dir" && as_dir=.
|
||||
for ac_exec_ext in '' $ac_executable_extensions; do
|
||||
if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
|
||||
ac_cv_prog_INDENT="${ac_tool_prefix}indent"
|
||||
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
|
||||
break 2
|
||||
fi
|
||||
done
|
||||
done
|
||||
IFS=$as_save_IFS
|
||||
|
||||
fi
|
||||
fi
|
||||
INDENT=$ac_cv_prog_INDENT
|
||||
if test -n "$INDENT"; then
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $INDENT" >&5
|
||||
$as_echo "$INDENT" >&6; }
|
||||
else
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
|
||||
$as_echo "no" >&6; }
|
||||
fi
|
||||
|
||||
|
||||
fi
|
||||
if test -z "$ac_cv_prog_INDENT"; then
|
||||
ac_ct_INDENT=$INDENT
|
||||
# Extract the first word of "indent", so it can be a program name with args.
|
||||
set dummy indent; ac_word=$2
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
|
||||
$as_echo_n "checking for $ac_word... " >&6; }
|
||||
if ${ac_cv_prog_ac_ct_INDENT+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
if test -n "$ac_ct_INDENT"; then
|
||||
ac_cv_prog_ac_ct_INDENT="$ac_ct_INDENT" # Let the user override the test.
|
||||
else
|
||||
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
|
||||
for as_dir in $PATH
|
||||
do
|
||||
IFS=$as_save_IFS
|
||||
test -z "$as_dir" && as_dir=.
|
||||
for ac_exec_ext in '' $ac_executable_extensions; do
|
||||
if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
|
||||
ac_cv_prog_ac_ct_INDENT="indent"
|
||||
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
|
||||
break 2
|
||||
fi
|
||||
done
|
||||
done
|
||||
IFS=$as_save_IFS
|
||||
|
||||
fi
|
||||
fi
|
||||
ac_ct_INDENT=$ac_cv_prog_ac_ct_INDENT
|
||||
if test -n "$ac_ct_INDENT"; then
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_INDENT" >&5
|
||||
$as_echo "$ac_ct_INDENT" >&6; }
|
||||
else
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
|
||||
$as_echo "no" >&6; }
|
||||
fi
|
||||
|
||||
if test "x$ac_ct_INDENT" = x; then
|
||||
INDENT=":"
|
||||
else
|
||||
case $cross_compiling:$ac_tool_warned in
|
||||
yes:)
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
|
||||
$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
|
||||
ac_tool_warned=yes ;;
|
||||
esac
|
||||
INDENT=$ac_ct_INDENT
|
||||
fi
|
||||
else
|
||||
INDENT="$ac_cv_prog_INDENT"
|
||||
fi
|
||||
|
||||
if test -n "$ac_tool_prefix"; then
|
||||
# Extract the first word of "${ac_tool_prefix}ar", so it can be a program name with args.
|
||||
set dummy ${ac_tool_prefix}ar; ac_word=$2
|
||||
@ -6994,6 +6918,68 @@ fi
|
||||
|
||||
fi
|
||||
|
||||
CUDA_LDFLAGS=""
|
||||
CUDA_CPPFLAGS=""
|
||||
if test "$yap_cv_cuda" = no
|
||||
then
|
||||
ENABLE_CUDA="@# "
|
||||
else
|
||||
# Extract the first word of "nvcc", so it can be a program name with args.
|
||||
set dummy nvcc; ac_word=$2
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
|
||||
$as_echo_n "checking for $ac_word... " >&6; }
|
||||
if ${ac_cv_path_NVCC+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
case $NVCC in
|
||||
[\\/]* | ?:[\\/]*)
|
||||
ac_cv_path_NVCC="$NVCC" # Let the user override the test with a path.
|
||||
;;
|
||||
*)
|
||||
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
|
||||
for as_dir in $yap_cv_cuda/bin
|
||||
do
|
||||
IFS=$as_save_IFS
|
||||
test -z "$as_dir" && as_dir=.
|
||||
for ac_exec_ext in '' $ac_executable_extensions; do
|
||||
if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
|
||||
ac_cv_path_NVCC="$as_dir/$ac_word$ac_exec_ext"
|
||||
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
|
||||
break 2
|
||||
fi
|
||||
done
|
||||
done
|
||||
IFS=$as_save_IFS
|
||||
|
||||
test -z "$ac_cv_path_NVCC" && ac_cv_path_NVCC="no"
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
NVCC=$ac_cv_path_NVCC
|
||||
if test -n "$NVCC"; then
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $NVCC" >&5
|
||||
$as_echo "$NVCC" >&6; }
|
||||
else
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
|
||||
$as_echo "no" >&6; }
|
||||
fi
|
||||
|
||||
|
||||
if test "$yap_cv_cuda" = no
|
||||
then
|
||||
ENABLE_CUDA="@# "
|
||||
else
|
||||
ENABLE_CUDA="@# "
|
||||
case "$target_os" in
|
||||
*darwin*)
|
||||
CUDA_LDFLAGS="$LDFLAGS"
|
||||
CUDA_CPPFLAGS="-shared -arch=sm_20 -Xcompiler -fPIC -O3 "
|
||||
CUDA_SHLIB_LD="$NVCC -Xcompiler -dynamiclib"
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
if test "$yap_cv_myddas" != "no"
|
||||
then
|
||||
@ -9819,6 +9805,9 @@ CMDEXT=sh
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
40
configure.in
40
configure.in
@ -279,6 +279,17 @@ AC_ARG_WITH(minisat,
|
||||
fi,
|
||||
[yap_cv_minisat=yes])
|
||||
|
||||
AC_ARG_WITH(cuda,
|
||||
[ --enable-cuda use minisat interface],
|
||||
if test "$withval" = yes; then
|
||||
yap_cv_cuda=/usr
|
||||
elif test "$withval" = no; then
|
||||
yap_cv_cuda=no
|
||||
else
|
||||
yap_cv_cuda="$withval"
|
||||
fi,
|
||||
[yap_cv_cuda=no])
|
||||
|
||||
AC_ARG_WITH(cudd,
|
||||
[ --with-cudd[=DIR] use CUDD package in DIR],
|
||||
yap_cv_cudd="$withval",
|
||||
@ -567,9 +578,6 @@ else
|
||||
ENABLE_CUDD=""
|
||||
fi
|
||||
|
||||
CUDA_LDFLAGS=""
|
||||
CUDA_CPPFLAGS=""
|
||||
|
||||
|
||||
|
||||
dnl condor universe does not like dynamic linking on Linux, DEC, and HP-UX platforms.
|
||||
@ -688,7 +696,6 @@ dnl Checks for programs.
|
||||
AC_PROG_LN_S
|
||||
AC_PROG_INSTALL
|
||||
AC_PROG_RANLIB
|
||||
AC_CHECK_TOOL(INDENT,indent,:)
|
||||
AC_CHECK_TOOL(AR,ar,:)
|
||||
AC_CHECK_TOOL(MPI_CC,mpicc,${CC})
|
||||
AC_PATH_PROG(INSTALL_INFO,install-info,true,$PATH:/sbin:/usr/sbin:/usr/etc:/usr/local/sbin)
|
||||
@ -892,6 +899,28 @@ if test "$yap_cv_judy" != "no"; then
|
||||
AC_CHECK_LIB(Judy, Judy1Set,,[AC_MSG_RESULT([libJudy not found, UDI will only work with one Index at a time])])
|
||||
fi
|
||||
|
||||
CUDA_LDFLAGS=""
|
||||
CUDA_CPPFLAGS=""
|
||||
if test "$yap_cv_cuda" = no
|
||||
then
|
||||
ENABLE_CUDA="@# "
|
||||
else
|
||||
AC_PATH_PROG(NVCC, [nvcc], [no], [$yap_cv_cuda/bin])
|
||||
if test "$yap_cv_cuda" = no
|
||||
then
|
||||
ENABLE_CUDA="@# "
|
||||
else
|
||||
ENABLE_CUDA="@# "
|
||||
case "$target_os" in
|
||||
*darwin*)
|
||||
CUDA_LDFLAGS="$LDFLAGS"
|
||||
CUDA_CPPFLAGS="-shared -arch=sm_20 -Xcompiler -fPIC -O3 "
|
||||
CUDA_SHLIB_LD="$NVCC -Xcompiler -dynamiclib"
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
fi
|
||||
|
||||
dnl if test "$yap_cv_cudd" != "no"
|
||||
dnl then
|
||||
dnl AC_CHECK_LIB(cudd,Cudd_Init)
|
||||
@ -1844,6 +1873,9 @@ AC_SUBST(INSTALL_INFO)
|
||||
dnl let YAP_EXTRAS fall through configure, from the env into Makefile
|
||||
AC_SUBST(YAP_EXTRAS)
|
||||
AC_SUBST(NO_BUILTIN_REGEXP)
|
||||
AC_SUBST(ENABLE_CUDA)
|
||||
AC_SUBST(NVCC)
|
||||
AC_SUBST(CUDA_SHLIB_LD)
|
||||
AC_SUBST(CUDA_CPPFLAGS)
|
||||
AC_SUBST(CUDA_LDFLAGS)
|
||||
AC_SUBST(ENABLE_CHR)
|
||||
|
@ -20,7 +20,9 @@ YAPLIBDIR=@libdir@/Yap
|
||||
#
|
||||
#
|
||||
CC=@CC@
|
||||
NVCC=@NVCC@
|
||||
CFLAGS= @SHLIB_CFLAGS@ $(YAP_EXTRAS) $(DEFS) -I$(srcdir) -I../.. -I$(srcdir)/../../include @CUDA_CPPFLAGS@
|
||||
NVCCFLAGS=@CUDA_CPPFLAGS@ -I$(srcdir) -I../.. -I$(srcdir)/../../include
|
||||
LDFLAGS=@LDFLAGS@
|
||||
#
|
||||
#
|
||||
@ -40,7 +42,7 @@ CWD=$(PWD)
|
||||
BDD_PROLOG= \
|
||||
$(srcdir)/cuda.yap
|
||||
|
||||
OBJS=cuda.o
|
||||
OBJS=cuda.o memory.o lista.o
|
||||
SOBJS=cuda.@SO@
|
||||
|
||||
#in some systems we just create a single object, in others we need to
|
||||
@ -48,14 +50,17 @@ SOBJS=cuda.@SO@
|
||||
|
||||
all: $(SOBJS)
|
||||
|
||||
cuda.o: $(srcdir)/cuda.c
|
||||
$(CC) -c $(CFLAGS) $(srcdir)/cuda.c -o cuda.o
|
||||
cuda.o: $(srcdir)/cuda.c $(srcdir)/pred.h
|
||||
$(NVCC) -c $(NVCCFLAGS) $(srcdir)/cuda.c -o cuda.o
|
||||
|
||||
@DO_SECOND_LD@%.@SO@: %.o
|
||||
@DO_SECOND_LD@ @SHLIB_LD@ $(LDFLAGS) -o $@ $< @EXTRA_LIBS_FOR_DLLS@ @CUDA_LDFLAGS@
|
||||
lista.o: $(srcdir)/lista.cu $(srcdir)/pred.h
|
||||
$(NVCC) -c $(NVCCFLAGS) $(srcdir)/lista.cu -o lista.o
|
||||
|
||||
@DO_SECOND_LD@cuda.@SO@: cuda.o
|
||||
@DO_SECOND_LD@ @SHLIB_LD@ $(LDFLAGS) -o cuda.@SO@ cuda.o @EXTRA_LIBS_FOR_DLLS@ @CUDA_LDFLAGS@
|
||||
memory.o: $(srcdir)/memory.cu $(srcdir)/pred.h
|
||||
$(NVCC) -c $(NVCCFLAGS) $(srcdir)/memory.cu -o memory.o
|
||||
|
||||
@DO_SECOND_LD@cuda.@SO@: $(OBJS)
|
||||
@DO_SECOND_LD@ @CUDA_SHLIB_LD@ $(CUDA_LDFLAGS) -o cuda.@SO@ $(OBJS) -L../.. -lYAP
|
||||
|
||||
install: all
|
||||
mkdir -p $(DESTDIR)$(SHAREDIR)
|
||||
|
131
packages/cuda/bpreds.cu
Normal file
131
packages/cuda/bpreds.cu
Normal file
@ -0,0 +1,131 @@
|
||||
#define SBG_EQ (-1)
|
||||
#define SBG_GT (-2)
|
||||
#define SBG_LT (-3)
|
||||
#define SBG_GE (-4)
|
||||
#define SBG_LE (-5)
|
||||
#define SBG_DF (-6)
|
||||
|
||||
__global__ void predicates(int *dop1, int rows, int cols, int *cons, int numc, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int x, rowact, op1, op2;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
rowact = id * cols;
|
||||
for(x = 0; x < numc; x += 3)
|
||||
{
|
||||
op1 = shared[x+1];
|
||||
if(op1 < 0)
|
||||
op1 *= -1;
|
||||
else
|
||||
op1 = dop1[rowact + op1];
|
||||
op2 = shared[x+2];
|
||||
if(op2 < 0)
|
||||
op2 *= -1;
|
||||
else
|
||||
op2 = dop1[rowact + op2];
|
||||
switch(shared[x])
|
||||
{
|
||||
case SBG_EQ: if(op1 != op2)
|
||||
return;
|
||||
case SBG_GT: if(op1 <= op2)
|
||||
return;
|
||||
case SBG_LT: if(op1 >= op2)
|
||||
return;
|
||||
case SBG_GE: if(op1 < op2)
|
||||
return;
|
||||
case SBG_LE: if(op1 > op2)
|
||||
return;
|
||||
case SBG_DF: if(op1 == op2)
|
||||
return;
|
||||
}
|
||||
}
|
||||
res[id] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
int bpreds(int *dop1, int rows, int *bin, int3 numpreds, int **ret)
|
||||
{
|
||||
int *temp;
|
||||
int tmplen = rows + 1;
|
||||
int size = tmplen * sizeof(int);
|
||||
reservar(&temp, size);
|
||||
cudaMemset(temp, 0, size);
|
||||
|
||||
int *dhead;
|
||||
int predn = numpreds.x * 3;
|
||||
int spredn = predn * sizeof(int);
|
||||
int sproj = numpreds.z * sizeof(int);
|
||||
int hsize;
|
||||
if(predn > numpreds.z)
|
||||
hsize = spredn;
|
||||
else
|
||||
hsize = sproj;
|
||||
reservar(&dhead, hsize);
|
||||
cudaMemcpy(dhead, bin, spredn, cudaMemcpyHostToDevice);
|
||||
|
||||
int blockllen = rows / 1024 + 1;
|
||||
int numthreads = 1024;
|
||||
|
||||
/*int x;
|
||||
cout << "arraypreds = ";
|
||||
for(x = 0; x < predn; x++)
|
||||
cout << bin[x] << " ";
|
||||
cout << endl;
|
||||
cout << "temptable = ";
|
||||
for(x = 0; x < numpreds.z; x++)
|
||||
cout << bin[x+predn] << " ";
|
||||
cout << endl;
|
||||
int y;
|
||||
int *hop1 = (int *)malloc(numpreds.y * rows * sizeof(int));
|
||||
cudaMemcpy(hop1, dop1, numpreds.y * rows * sizeof(int), cudaMemcpyDeviceToHost);
|
||||
for(x = 0; x < rows; x++)
|
||||
{
|
||||
for(y = 0; y < numpreds.y; y++)
|
||||
cout << hop1[x * numpreds.y + y] << " ";
|
||||
cout << endl;
|
||||
}
|
||||
free(hop1);*/
|
||||
|
||||
predicates<<<blockllen, numthreads, spredn>>>(dop1, rows, numpreds.y, dhead, predn, temp + 1);
|
||||
|
||||
/*int y;
|
||||
int *hop1 = (int *)malloc((rows + 1) * sizeof(int));
|
||||
cudaMemcpy(hop1, temp, (rows + 1) * sizeof(int), cudaMemcpyDeviceToHost);
|
||||
for(x = 0; x < (rows + 1); x++)
|
||||
cout << hop1[x] << " ";
|
||||
cout << endl;
|
||||
free(hop1);*/
|
||||
|
||||
thrust::device_ptr<int> res;
|
||||
res = thrust::device_pointer_cast(temp);
|
||||
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
|
||||
int num = res[rows];
|
||||
if(num == 0)
|
||||
return 0;
|
||||
|
||||
int *fres;
|
||||
reservar(&fres, num * sproj);
|
||||
cudaMemcpy(dhead, bin + predn, sproj, cudaMemcpyHostToDevice);
|
||||
llenarproyectar<<<blockllen, numthreads, sproj>>>(dop1, rows, numpreds.y, temp, dhead, numpreds.z, fres);
|
||||
|
||||
/*int y;
|
||||
int *hop1 = (int *)malloc(numpreds.z * num * sizeof(int));
|
||||
cudaMemcpy(hop1, fres, numpreds.z * num * sizeof(int), cudaMemcpyDeviceToHost);
|
||||
for(x = 0; x < num; x++)
|
||||
{
|
||||
for(y = 0; y < numpreds.z; y++)
|
||||
cout << hop1[x * numpreds.z + y] << " ";
|
||||
cout << endl;
|
||||
}
|
||||
free(hop1);*/
|
||||
|
||||
liberar(dhead, hsize);
|
||||
liberar(temp, size);
|
||||
*ret = fres;
|
||||
return num;
|
||||
}
|
@ -4,38 +4,36 @@
|
||||
#include "YapInterface.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include "pred.h"
|
||||
|
||||
typedef struct predicate_struct {
|
||||
int name;
|
||||
int num_rows;
|
||||
int num_columns;
|
||||
int is_fact;
|
||||
int *address_host_table;
|
||||
} predicate;
|
||||
predicate *facts[100]; /*Temporary solution to maintain facts and rules*/
|
||||
predicate *rules[100];
|
||||
int32_t cf = 0, cr = 0;
|
||||
|
||||
// initialize CUDA system
|
||||
void Cuda_Initialize( void );
|
||||
|
||||
// add/replace a set of facts for predicate pred
|
||||
int Cuda_NewFacts(predicate *pred);
|
||||
int32_t Cuda_NewFacts(predicate *pred);
|
||||
|
||||
// add/replace a rule for predicate pred
|
||||
int Cuda_NewRule(predicate *pred);
|
||||
int32_t Cuda_NewRule(predicate *pred);
|
||||
|
||||
// erase predicate pred
|
||||
int Cuda_Erase(predicate *pred);
|
||||
int32_t Cuda_Erase(predicate *pred);
|
||||
|
||||
// evaluate predicate pred, mat is bound to a vector of solutions, and
|
||||
// output the count
|
||||
int Cuda_Eval(predicate *pred, int **mat);
|
||||
//int32_t Cuda_Eval(predicate *pred, int32_t **mat); This functions arguments were changed, please see pred.h
|
||||
|
||||
void init_cuda( void );
|
||||
|
||||
static void
|
||||
dump_mat(int mat[], int nrows, int ncols)
|
||||
dump_mat(int32_t mat[], int32_t nrows, int32_t ncols)
|
||||
{
|
||||
int i, j;
|
||||
int32_t i, j;
|
||||
for ( i=0; i< nrows; i++) {
|
||||
printf("%d", mat[i*ncols]);
|
||||
for (j=1; j < ncols; j++) {
|
||||
@ -46,10 +44,10 @@ dump_mat(int mat[], int nrows, int ncols)
|
||||
}
|
||||
|
||||
static void
|
||||
dump_vec(int vec[], int rows)
|
||||
dump_vec(int32_t vec[], int32_t rows)
|
||||
{
|
||||
int i = 1;
|
||||
int j = 0;
|
||||
int32_t i = 1;
|
||||
int32_t j = 0;
|
||||
printf("%d", vec[0]);
|
||||
for (j = 0; j < rows; j++) {
|
||||
for ( ; vec[i]; i++ ) {
|
||||
@ -67,19 +65,23 @@ void Cuda_Initialize( void )
|
||||
{
|
||||
}
|
||||
|
||||
int Cuda_NewFacts(predicate *pe)
|
||||
int32_t Cuda_NewFacts(predicate *pe)
|
||||
{
|
||||
dump_mat( pe->address_host_table, pe->num_rows, pe->num_columns );
|
||||
facts[cf] = pe;
|
||||
cf++;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
int Cuda_NewRule(predicate *pe)
|
||||
int32_t Cuda_NewRule(predicate *pe)
|
||||
{
|
||||
dump_vec( pe->address_host_table, pe->num_rows);
|
||||
rules[cr] = pe;
|
||||
cr++;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
int Cuda_Erase(predicate *pe)
|
||||
int32_t Cuda_Erase(predicate *pe)
|
||||
{
|
||||
if (pe->address_host_table)
|
||||
free( pe->address_host_table );
|
||||
@ -90,15 +92,15 @@ int Cuda_Erase(predicate *pe)
|
||||
static int
|
||||
load_facts( void ) {
|
||||
|
||||
int nrows = YAP_IntOfTerm(YAP_ARG1);
|
||||
int ncols = YAP_IntOfTerm(YAP_ARG2), i = 0;
|
||||
int32_t nrows = YAP_IntOfTerm(YAP_ARG1);
|
||||
int32_t ncols = YAP_IntOfTerm(YAP_ARG2), i = 0;
|
||||
YAP_Term t3 = YAP_ARG3;
|
||||
int *mat = (int *)malloc(sizeof(int)*nrows*ncols);
|
||||
int pname = YAP_AtomToInt(YAP_NameOfFunctor(YAP_FunctorOfTerm(YAP_HeadOfTerm(t3))));
|
||||
int32_t *mat = (int32_t *)malloc(sizeof(int32_t)*nrows*ncols);
|
||||
int32_t pname = YAP_AtomToInt(YAP_NameOfFunctor(YAP_FunctorOfTerm(YAP_HeadOfTerm(t3))));
|
||||
predicate *pred;
|
||||
|
||||
while(YAP_IsPairTerm(t3)) {
|
||||
int j = 0;
|
||||
int32_t j = 0;
|
||||
YAP_Term th = YAP_HeadOfTerm(t3);
|
||||
|
||||
for (j = 0; j < ncols; j++) {
|
||||
@ -136,28 +138,28 @@ load_facts( void ) {
|
||||
static int
|
||||
load_rule( void ) {
|
||||
// maximum of 2K symbols per rule, should be enough for ILP
|
||||
int vec[2048], *ptr = vec, *nvec;
|
||||
int32_t vec[2048], *ptr = vec, *nvec;
|
||||
// qK different variables;
|
||||
YAP_Term vars[1024];
|
||||
int nvars = 0;
|
||||
int ngoals = YAP_IntOfTerm(YAP_ARG1); /* gives the number of goals */
|
||||
int ncols = YAP_IntOfTerm(YAP_ARG2);
|
||||
int32_t nvars = 0;
|
||||
int32_t ngoals = YAP_IntOfTerm(YAP_ARG1); /* gives the number of goals */
|
||||
int32_t ncols = YAP_IntOfTerm(YAP_ARG2);
|
||||
YAP_Term t3 = YAP_ARG3;
|
||||
int pname = YAP_AtomToInt(YAP_NameOfFunctor(YAP_FunctorOfTerm(YAP_HeadOfTerm(t3))));
|
||||
int32_t pname = YAP_AtomToInt(YAP_NameOfFunctor(YAP_FunctorOfTerm(YAP_HeadOfTerm(t3))));
|
||||
predicate *pred;
|
||||
|
||||
while(YAP_IsPairTerm(t3)) {
|
||||
int j = 0;
|
||||
int32_t j = 0;
|
||||
YAP_Term th = YAP_HeadOfTerm(t3);
|
||||
YAP_Functor f = YAP_FunctorOfTerm( th );
|
||||
int n = YAP_ArityOfFunctor( f );
|
||||
int32_t n = YAP_ArityOfFunctor( f );
|
||||
|
||||
*ptr++ = YAP_AtomToInt( YAP_NameOfFunctor( f ) );
|
||||
for (j = 0; j < n; j++) {
|
||||
YAP_Term ta = YAP_ArgOfTerm(j+1, th);
|
||||
|
||||
if (YAP_IsVarTerm(ta)) {
|
||||
int k;
|
||||
int32_t k;
|
||||
for (k = 0; k< nvars; k++) {
|
||||
if (vars[k] == ta) {
|
||||
*ptr++ = k+1;
|
||||
@ -190,8 +192,8 @@ load_rule( void ) {
|
||||
pred->num_rows = ngoals;
|
||||
pred->num_columns = ncols;
|
||||
pred->is_fact = FALSE;
|
||||
nvec = (int *)malloc(sizeof(int)*(ptr-vec));
|
||||
memcpy(nvec, vec, sizeof(int)*(ptr-vec));
|
||||
nvec = (int32_t *)malloc(sizeof(int32_t)*(ptr-vec));
|
||||
memcpy(nvec, vec, sizeof(int32_t)*(ptr-vec));
|
||||
pred->address_host_table = nvec;
|
||||
Cuda_NewRule( pred );
|
||||
return YAP_Unify(YAP_ARG4, YAP_MkIntTerm((YAP_Int)pred));
|
||||
@ -207,19 +209,19 @@ cuda_erase( void )
|
||||
static int
|
||||
cuda_eval( void )
|
||||
{
|
||||
int *mat;
|
||||
int32_t *mat;
|
||||
predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
|
||||
int n = Cuda_Eval( ptr, & mat);
|
||||
int ncols = ptr->num_columns;
|
||||
int32_t n = Cuda_Eval(facts, cf, rules, cr, ptr, & mat);
|
||||
int32_t ncols = ptr->num_columns;
|
||||
YAP_Term out = YAP_TermNil();
|
||||
YAP_Functor f = YAP_MkFunctor(YAP_IntToAtom(ptr->name), ncols);
|
||||
YAP_Term vec[256];
|
||||
int i;
|
||||
int32_t i;
|
||||
|
||||
if (n < 0)
|
||||
return FALSE;
|
||||
for (i=0; i<n; i++) {
|
||||
int ni = ((n-1)-i)*ncols, j;
|
||||
int32_t ni = ((n-1)-i)*ncols, j;
|
||||
for (j=0; j<ncols; j++) {
|
||||
vec[i] = YAP_MkIntTerm(mat[ni+j]);
|
||||
}
|
||||
@ -230,9 +232,9 @@ cuda_eval( void )
|
||||
|
||||
static int cuda_count( void )
|
||||
{
|
||||
int *mat;
|
||||
int32_t *mat;
|
||||
predicate *ptr = (predicate *)YAP_IntOfTerm(YAP_ARG1);
|
||||
int n = Cuda_Eval( ptr, & mat);
|
||||
int32_t n = Cuda_Eval(facts, cf, rules, cr, ptr, & mat);
|
||||
|
||||
if (n < 0)
|
||||
return FALSE;
|
||||
|
1224
packages/cuda/lista.cu
Normal file
1224
packages/cuda/lista.cu
Normal file
File diff suppressed because it is too large
Load Diff
482
packages/cuda/memory.cu
Normal file
482
packages/cuda/memory.cu
Normal file
@ -0,0 +1,482 @@
|
||||
#include <list>
|
||||
#include <iostream>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <algorithm>
|
||||
#include <thrust/device_vector.h>
|
||||
#include "lista.h"
|
||||
#include "memory.h"
|
||||
|
||||
#define MAX_REC 200
|
||||
#define HALF_REC (MAX_REC / 2)
|
||||
#define MAX_FIX_POINTS 100
|
||||
|
||||
unsigned int avmem;
|
||||
memnode temp_storage[MAX_REC];
|
||||
list<memnode> GPUmem;
|
||||
list<memnode> CPUmem;
|
||||
|
||||
bool compareiteration(const memnode &r1, const memnode &r2)
|
||||
{
|
||||
return (r1.iteration < r2.iteration);
|
||||
}
|
||||
|
||||
bool comparename(const memnode &r1, const memnode &r2)
|
||||
{
|
||||
return (r1.name > r2.name);
|
||||
}
|
||||
|
||||
void calcular_mem(int dev)
|
||||
{
|
||||
cudaDeviceProp p;
|
||||
cudaGetDeviceProperties(&p, dev);
|
||||
avmem = p.totalGlobalMem;
|
||||
temp_storage[0].dev_address = NULL;
|
||||
temp_storage[0].size = 0;
|
||||
temp_storage[HALF_REC].dev_address = NULL;
|
||||
temp_storage[HALF_REC].size = 0;
|
||||
|
||||
//cout << "Initial memory available " << avmem << endl;
|
||||
}
|
||||
|
||||
template<class InputIterator>
|
||||
InputIterator buscarhecho(InputIterator first, InputIterator last, int name)
|
||||
{
|
||||
while(first!=last)
|
||||
{
|
||||
if(first->name == name) return first;
|
||||
++first;
|
||||
}
|
||||
return last;
|
||||
}
|
||||
|
||||
list<memnode>::iterator buscarpornombre(int name, int itr, int *totalrows, int *gpunum)
|
||||
{
|
||||
int x = 1, sum = 0;
|
||||
memnode temp;
|
||||
temp.name = name;
|
||||
temp.iteration = itr;
|
||||
pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
|
||||
while(rec.first != rec.second)
|
||||
{
|
||||
|
||||
//cout << "itr = " << itr << " rec.first = " << rec.first->name << endl;
|
||||
|
||||
if(rec.first->name == name)
|
||||
{
|
||||
temp_storage[x] = *rec.first;
|
||||
rec.first = GPUmem.erase(rec.first);
|
||||
sum += temp_storage[x].rows;
|
||||
x++;
|
||||
}
|
||||
else
|
||||
rec.first++;
|
||||
}
|
||||
//if(x > 1)
|
||||
rec.first = GPUmem.insert(rec.first, temp);
|
||||
*totalrows = sum;
|
||||
*gpunum = x;
|
||||
return rec.first;
|
||||
}
|
||||
|
||||
int buscarpornombrecpu(int name, int itr, int *totalrows)
|
||||
{
|
||||
int x = HALF_REC + 1, sum = 0;
|
||||
memnode temp;
|
||||
temp.iteration = itr;
|
||||
pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
|
||||
|
||||
/*if(rec.first != rec.second)
|
||||
cout << "bscnomcpu = " << rec.first->name << " " << rec.first->iteration << endl;*/
|
||||
|
||||
while(rec.first != rec.second)
|
||||
{
|
||||
if(rec.first->name == name)
|
||||
{
|
||||
temp_storage[x] = *rec.first;
|
||||
rec.first = CPUmem.erase(rec.first);
|
||||
sum += temp_storage[x].rows;
|
||||
x++;
|
||||
}
|
||||
else
|
||||
rec.first++;
|
||||
}
|
||||
*totalrows += sum;
|
||||
return x;
|
||||
}
|
||||
|
||||
void limpiar()
|
||||
{
|
||||
list<memnode>::iterator ini;
|
||||
memnode temp;
|
||||
|
||||
if(GPUmem.size() == 0)
|
||||
{
|
||||
cerr << "Not enough GPU memory: have " << avmem << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
ini = GPUmem.begin();
|
||||
if(ini->isrule)
|
||||
{
|
||||
temp = *ini;
|
||||
temp.dev_address = (int *)malloc(ini->size);
|
||||
cudaMemcpyAsync(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);
|
||||
CPUmem.push_back(temp);
|
||||
}
|
||||
liberar(ini->dev_address, ini->size);
|
||||
GPUmem.erase(ini);
|
||||
}
|
||||
|
||||
void limpiartodo(int *p1, int *p2)
|
||||
{
|
||||
list<memnode>::iterator ini;
|
||||
memnode temp;
|
||||
int cont = 0;
|
||||
if(p1 != NULL)
|
||||
cont++;
|
||||
if(p2 != NULL)
|
||||
cont++;
|
||||
ini = GPUmem.begin();
|
||||
|
||||
/*cout << "ANTES" << endl;
|
||||
mostrar_memoria();
|
||||
mostrar_memcpu();
|
||||
cout << "FIN ANTES" << endl;*/
|
||||
//cout << "mem = " << GPUmem.size() << " " << avmem << endl;
|
||||
|
||||
while(GPUmem.size() > cont)
|
||||
{
|
||||
if(ini->dev_address == p1 || ini->dev_address == p2)
|
||||
{
|
||||
ini++;
|
||||
continue;
|
||||
}
|
||||
if(ini->isrule)
|
||||
{
|
||||
temp = *ini;
|
||||
temp.dev_address = (int *)malloc(ini->size);
|
||||
cudaMemcpy(temp.dev_address, ini->dev_address, temp.size, cudaMemcpyDeviceToHost);
|
||||
CPUmem.push_back(temp);
|
||||
}
|
||||
liberar(ini->dev_address, temp.size);
|
||||
ini = GPUmem.erase(ini);
|
||||
}
|
||||
|
||||
/*cout << "DESPUES" << endl;
|
||||
mostrar_memoria();
|
||||
mostrar_memcpu();
|
||||
cout << "FIN DESPUES" << endl;*/
|
||||
//cout << "memfinal = " << GPUmem.size() << " " << avmem << endl;
|
||||
|
||||
}
|
||||
|
||||
void liberar(int *ptr, int size)
|
||||
{
|
||||
//cout << "L " << avmem << " " << size;
|
||||
|
||||
cudaFree(ptr);
|
||||
avmem += size;
|
||||
|
||||
//cout << " " << avmem << endl;
|
||||
}
|
||||
|
||||
void reservar(int **ptr, int size)
|
||||
{
|
||||
//cout << "R " << avmem << " " << size;
|
||||
|
||||
while(avmem < size)
|
||||
limpiar();
|
||||
while(cudaMalloc(ptr, size) == cudaErrorMemoryAllocation)
|
||||
limpiar();
|
||||
avmem -= size;
|
||||
|
||||
//cout << " " << avmem << endl;
|
||||
}
|
||||
|
||||
void registrar(int name, int num_columns, int *ptr, int rows, int itr, int rule)
|
||||
{
|
||||
memnode temp;
|
||||
temp.name = name;
|
||||
temp.dev_address = ptr;
|
||||
temp.rows = rows;
|
||||
temp.size = rows * num_columns * sizeof(int);
|
||||
temp.iteration = itr;
|
||||
temp.isrule = rule;
|
||||
GPUmem.push_back(temp);
|
||||
}
|
||||
|
||||
template<class InputIterator>
|
||||
void actualizar(int num_columns, int *ptr, int rows, InputIterator i)
|
||||
{
|
||||
i->dev_address = ptr;
|
||||
i->rows = rows;
|
||||
i->size = rows * num_columns * sizeof(int);
|
||||
}
|
||||
|
||||
int numrows(int name, int itr)
|
||||
{
|
||||
int sum = 0;
|
||||
memnode temp;
|
||||
temp.iteration = itr;
|
||||
pair<list<memnode>::iterator, list<memnode>::iterator> rec = equal_range(GPUmem.begin(), GPUmem.end(), temp, compareiteration);
|
||||
while(rec.first != rec.second)
|
||||
{
|
||||
if(rec.first->name == name)
|
||||
sum += rec.first->rows;
|
||||
rec.first++;
|
||||
}
|
||||
rec = equal_range(CPUmem.begin(), CPUmem.end(), temp, compareiteration);
|
||||
while(rec.first != rec.second)
|
||||
{
|
||||
if(rec.first->name == name)
|
||||
sum += rec.first->rows;
|
||||
rec.first++;
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
int cargar(int name, int num_rows, int num_columns, int is_fact, int *address_host_table, int **ptr, int itr)
|
||||
{
|
||||
int numgpu, numcpu, totalrows = 0;
|
||||
int *temp, x;
|
||||
int size, itrant;
|
||||
list<memnode>::iterator i;
|
||||
memnode fact;
|
||||
if(is_fact)
|
||||
{
|
||||
i = buscarhecho(GPUmem.begin(), GPUmem.end(), name);
|
||||
if(i != GPUmem.end())
|
||||
{
|
||||
fact = *i;
|
||||
GPUmem.erase(i);
|
||||
fact.iteration = itr;
|
||||
*ptr = fact.dev_address;
|
||||
GPUmem.push_back(fact);
|
||||
return fact.rows;
|
||||
}
|
||||
size = num_rows * num_columns * sizeof(int);
|
||||
reservar(&temp, size);
|
||||
cudaMemcpyAsync(temp, address_host_table, size, cudaMemcpyHostToDevice);
|
||||
registrar(name, num_columns, temp, num_rows, itr, 0);
|
||||
*ptr = temp;
|
||||
return num_rows;
|
||||
}
|
||||
if(itr > 0)
|
||||
{
|
||||
itrant = itr - 1;
|
||||
i = buscarpornombre(name, itrant, &totalrows, &numgpu);
|
||||
numcpu = buscarpornombrecpu(name, itrant, &totalrows);
|
||||
|
||||
if((numgpu == 2) && (numcpu == (HALF_REC + 1)))
|
||||
{
|
||||
actualizar(num_columns, temp_storage[1].dev_address, temp_storage[1].rows, i);
|
||||
*ptr = temp_storage[1].dev_address;
|
||||
return temp_storage[1].rows;
|
||||
}
|
||||
size = totalrows * num_columns * sizeof(int);
|
||||
reservar(&temp, size);
|
||||
for(x = 1; x < numgpu; x++)
|
||||
{
|
||||
cudaMemcpyAsync(temp + temp_storage[x-1].size, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyDeviceToDevice);
|
||||
liberar(temp_storage[x].dev_address, temp_storage[x].size);
|
||||
}
|
||||
for(x = HALF_REC + 1; x < numcpu; x++)
|
||||
{
|
||||
cudaMemcpyAsync(temp + temp_storage[x-1].size, temp_storage[x].dev_address, temp_storage[x].size, cudaMemcpyHostToDevice);
|
||||
free(temp_storage[x].dev_address);
|
||||
}
|
||||
actualizar(num_columns, temp, totalrows, i);
|
||||
*ptr = temp;
|
||||
return totalrows;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cargafinal(int name, int cols, int **ptr)
|
||||
{
|
||||
int *temp, *ini, cont = 0;
|
||||
memnode bus;
|
||||
bus.name = name;
|
||||
GPUmem.sort(comparename);
|
||||
CPUmem.sort(comparename);
|
||||
list<memnode>::iterator endg = GPUmem.end();
|
||||
list<memnode>::iterator endc = CPUmem.end();
|
||||
list<memnode>::iterator pos = lower_bound(GPUmem.begin(), endg, bus, comparename);
|
||||
list<memnode>::iterator gpu = pos;
|
||||
while(pos != endg && pos->name == name)
|
||||
{
|
||||
cont += pos->rows;
|
||||
pos++;
|
||||
}
|
||||
pos = lower_bound(CPUmem.begin(), endc, bus, comparename);
|
||||
list<memnode>::iterator cpu = pos;
|
||||
while(pos != endc && pos->name == name)
|
||||
{
|
||||
cont += pos->rows;
|
||||
pos++;
|
||||
}
|
||||
|
||||
reservar(&temp, cont * cols * sizeof(int));
|
||||
ini = temp;
|
||||
|
||||
pos = gpu;
|
||||
while(pos != endg && pos->name == name)
|
||||
{
|
||||
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyDeviceToDevice);
|
||||
temp += pos->size / sizeof(int);
|
||||
pos++;
|
||||
}
|
||||
pos = cpu;
|
||||
while(pos != endc && pos->name == name)
|
||||
{
|
||||
cudaMemcpy(temp, pos->dev_address, pos->size, cudaMemcpyHostToDevice);
|
||||
temp += pos->size / sizeof(int);
|
||||
pos++;
|
||||
}
|
||||
|
||||
/*int x, y;
|
||||
int *hop1 = (int *)malloc(cont * cols * sizeof(int));
|
||||
cudaMemcpy(hop1, ini, cont * cols * sizeof(int), cudaMemcpyDeviceToHost);
|
||||
cout << "select finala" << endl;
|
||||
for(x = 0; x < cont; x++)
|
||||
{
|
||||
for(y = 0; y < cols; y++)
|
||||
cout << hop1[x * cols + y] << " ";
|
||||
cout << endl;
|
||||
}
|
||||
cout << "select finala" << endl;*/
|
||||
|
||||
*ptr = ini;
|
||||
return cont;
|
||||
}
|
||||
|
||||
bool generadas(int name, int filas, int cols, int itr)
|
||||
{
|
||||
int r1, r2, x, fin;
|
||||
int *dop1, *dop2;
|
||||
|
||||
r2 = numrows(name, itr);
|
||||
if(itr < MAX_FIX_POINTS)
|
||||
fin = itr;
|
||||
else
|
||||
fin = MAX_FIX_POINTS;
|
||||
for(x = 1; x <= fin; x++)
|
||||
{
|
||||
r1 = numrows(name, itr - x);
|
||||
if(r1 == r2)
|
||||
{
|
||||
r2 = cargar(name, filas, cols, 0, NULL, &dop2, itr + 1);
|
||||
thrust::device_ptr<int> pt2 = thrust::device_pointer_cast(dop2);
|
||||
r1 = cargar(name, filas, cols, 0, NULL, &dop1, itr - x + 1);
|
||||
thrust::device_ptr<int> pt1 = thrust::device_pointer_cast(dop1);
|
||||
|
||||
/*int y;
|
||||
int *a = (int *)malloc(r1 * cols * sizeof(int));
|
||||
cudaMemcpy(a, dop1, r1 * cols * sizeof(int), cudaMemcpyDeviceToHost);
|
||||
for(x = 0; x < r1; x++)
|
||||
{
|
||||
for(y = 0; y < cols; y++)
|
||||
cout << a[x * cols + y] << " ";
|
||||
}
|
||||
cout << endl;
|
||||
cudaMemcpy(a, dop2, r1 * cols * sizeof(int), cudaMemcpyDeviceToHost);
|
||||
for(x = 0; x < r1; x++)
|
||||
{
|
||||
for(y = 0; y < cols; y++)
|
||||
cout << a[x * cols + y] << " ";
|
||||
}
|
||||
cout << endl;
|
||||
free(a);*/
|
||||
|
||||
if(thrust::equal(pt1, pt1 + r1, pt2) == true)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void mostrar_memoria()
|
||||
{
|
||||
int x;
|
||||
list<memnode>::iterator i = GPUmem.begin();
|
||||
cout << "Memoria inicio GPU" << endl;
|
||||
for(x = 0; x < GPUmem.size(); x++, i++)
|
||||
cout << i->name << " " << i->iteration << " " << i->size << endl;
|
||||
cout << "Memoria fin GPU" << endl;
|
||||
}
|
||||
|
||||
void mostrar_memcpu()
|
||||
{
|
||||
int x;
|
||||
list<memnode>::iterator i = CPUmem.begin();
|
||||
cout << "Memoria inicio CPU" << endl;
|
||||
for(x = 0; x < CPUmem.size(); x++, i++)
|
||||
cout << i->name << " " << i->iteration << endl;
|
||||
cout << "Memoria fin CPU" << endl;
|
||||
}
|
||||
|
||||
void resultados(vector<rulenode>::iterator first, vector<rulenode>::iterator last)
|
||||
{
|
||||
GPUmem.sort(comparename);
|
||||
CPUmem.sort(comparename);
|
||||
list<memnode>::iterator gpu = GPUmem.begin();
|
||||
list<memnode>::iterator cpu = CPUmem.begin();
|
||||
int x, y, of, cols;
|
||||
int *temp, cont = 0;
|
||||
while(first != last)
|
||||
{
|
||||
while(first->name == gpu->name)
|
||||
{
|
||||
temp = (int *)malloc(gpu->size);
|
||||
cudaMemcpy(temp, gpu->dev_address, gpu->size, cudaMemcpyDeviceToHost);
|
||||
cols = gpu->size / (gpu->rows * sizeof(int));
|
||||
cont += gpu->rows;
|
||||
for(x = 0, of = 0; x < gpu->rows; x++)
|
||||
{
|
||||
for(y = 0; y < cols; y++, of++)
|
||||
cout << temp[of] << " ";
|
||||
cout << endl;
|
||||
}
|
||||
cudaFree(gpu->dev_address);
|
||||
free(temp);
|
||||
gpu++;
|
||||
}
|
||||
while(first->name == cpu->name)
|
||||
{
|
||||
cols = cpu->size / (cpu->rows * sizeof(int));
|
||||
cont += cpu->rows;
|
||||
for(x = 0, of = 0; x < cpu->rows; x++)
|
||||
{
|
||||
for(y = 0; y < cols; y++, of++)
|
||||
cout << cpu->dev_address[of] << " ";
|
||||
cout << endl;
|
||||
}
|
||||
free(cpu->dev_address);
|
||||
cpu++;
|
||||
}
|
||||
first++;
|
||||
}
|
||||
cout << cont << endl;
|
||||
}
|
||||
|
||||
/*device_vector<int> reservar_vector(int size)
|
||||
{
|
||||
limpiar(size * sizeof(int));
|
||||
device_vector<int> ret(size);
|
||||
return ret;
|
||||
}*/
|
||||
/*
|
||||
void reservar_resultado(InputIterator req, int *ptr, int size)
|
||||
{
|
||||
limpiar(size);
|
||||
memnode temp;
|
||||
temp.name = req->name;
|
||||
temp.size = size;
|
||||
cudaMalloc(&temp.dev_address, size);
|
||||
temp.in_use = 1;
|
||||
GPUmem.push_back(temp);
|
||||
avmem -= size;
|
||||
ptr = temp.dev_address;
|
||||
}*/
|
289
packages/cuda/selectproyect.cu
Normal file
289
packages/cuda/selectproyect.cu
Normal file
@ -0,0 +1,289 @@
|
||||
#include <thrust/device_vector.h>
|
||||
//#include <thrust/device_ptr.h>
|
||||
#include <thrust/scan.h>
|
||||
#include <stdlib.h>
|
||||
#include "memory.h"
|
||||
|
||||
__global__ void marcar(int *dop1, int rows, int cols, int *cons, int numc, int *res) /*a libreria*/
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int x, rowact, posact;
|
||||
if(threadIdx.x < numc)
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
rowact = id * cols;
|
||||
for(x = 0; x < numc; x += 2)
|
||||
{
|
||||
posact = rowact + shared[x];
|
||||
if(dop1[posact] != shared[x+1])
|
||||
return;
|
||||
}
|
||||
res[id] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void marcar2(int *dop1, int rows, int cols, int *cons, int numc, int *res) /*a libreria*/
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int *spos = &shared[numc];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int x, rowact, posact;
|
||||
if(threadIdx.x < (numc * 2))
|
||||
shared[threadIdx.x] = cons[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
if(res[id] == 0)
|
||||
return;
|
||||
rowact = id * cols;
|
||||
for(x = 0; x < numc; x++)
|
||||
{
|
||||
posact = rowact + spos[x];
|
||||
if(dop1[posact] != shared[x])
|
||||
{
|
||||
res[id] = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void samejoin(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int temp, temp2, pos, x, y;
|
||||
if(threadIdx.x < cont)
|
||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
if(res[id] == 0)
|
||||
return;
|
||||
pos = id * cols;
|
||||
for(x = 0; x < cont; x++)
|
||||
{
|
||||
temp = shared[x];
|
||||
y = x + 1;
|
||||
temp2 = shared[y];
|
||||
while(temp2 > -1)
|
||||
{
|
||||
if(dop1[temp+pos] != dop1[temp2+pos])
|
||||
{
|
||||
res[id] = 0;
|
||||
return;
|
||||
}
|
||||
y++;
|
||||
temp2 = shared[y];
|
||||
}
|
||||
x = y;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void samejoin2(int *dop1, int rows, int cols, int *dhead, int cont, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int temp, temp2, pos, x, y;
|
||||
if(threadIdx.x < cont)
|
||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
pos = id * cols;
|
||||
for(x = 0; x < cont; x++)
|
||||
{
|
||||
temp = shared[x];
|
||||
y = x + 1;
|
||||
temp2 = shared[y];
|
||||
while(temp2 > -1)
|
||||
{
|
||||
if(dop1[temp+pos] != dop1[temp2+pos])
|
||||
return;
|
||||
y++;
|
||||
temp2 = shared[y];
|
||||
}
|
||||
x = y;
|
||||
}
|
||||
res[id] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void proyectar(int *dop1, int rows, int cols, int *dhead, int hsize, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int pos, posr, x;
|
||||
if(threadIdx.x < hsize)
|
||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
pos = id * cols;
|
||||
posr = id * hsize;
|
||||
for(x = 0; x < hsize; x++, posr++)
|
||||
res[posr] = dop1[pos+shared[x]];
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void llenarproyectar(int *dop1, int rows, int cols, int *temp, int *dhead, int hsize, int *res)
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int pos, posr, x;
|
||||
if(threadIdx.x < cols)
|
||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||
__syncthreads();
|
||||
if(id < rows)
|
||||
{
|
||||
posr = temp[id+1];
|
||||
if(temp[id] != posr && posr > 0)
|
||||
{
|
||||
pos = id * cols;
|
||||
posr = (posr - 1) * hsize;
|
||||
for(x = 0; x < hsize; x++, posr++)
|
||||
res[posr] = dop1[pos+shared[x]];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*__global__ void removedup()
|
||||
{
|
||||
extern __shared__ int shared[];
|
||||
int id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if(threadIdx.x < cols)
|
||||
shared[threadIdx.x] = dhead[threadIdx.x];
|
||||
if(id < rows)
|
||||
{
|
||||
|
||||
}
|
||||
}*/
|
||||
|
||||
template<typename T> /*a libreria*/
|
||||
struct suma : public binary_function<T,T,T>
|
||||
{
|
||||
__host__ __device__
|
||||
T operator()(const T &r1, const T &r2)
|
||||
{
|
||||
if(r1 > -1)
|
||||
{
|
||||
if(r2 > 0)
|
||||
return r1 + r2;
|
||||
return -r1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(r2 > 0)
|
||||
return abs(r1) + r2;
|
||||
return r1;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
int mayor(int a, int b, int c)
|
||||
{
|
||||
if(a > b)
|
||||
{
|
||||
if(a > c)
|
||||
return a;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(b > c)
|
||||
return b;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
int selectproyect(int *dop1, int rows, int cols, int head_size, int *select, int numselect, int *selfjoin, int numselfj, int *project, int **ret)
|
||||
{
|
||||
int *fres = NULL, *temp = NULL;
|
||||
int *dhead = NULL, tmplen;
|
||||
int size, size2, num;
|
||||
thrust::device_ptr<int> res;
|
||||
|
||||
int head_bytes = mayor(numselect, numselfj, head_size) * sizeof(int);
|
||||
reservar(&dhead, head_bytes);
|
||||
|
||||
int blockllen = rows / 1024 + 1;
|
||||
int numthreads = 1024;
|
||||
|
||||
//removerep(dop1, rows, cols, dhead,)
|
||||
if(numselect > 0)
|
||||
{
|
||||
tmplen = rows + 1;
|
||||
size2 = tmplen * sizeof(int);
|
||||
reservar(&temp, size2);
|
||||
cudaMemset(temp, 0, size2);
|
||||
|
||||
size = numselect * sizeof(int);
|
||||
cudaMemcpy(dhead, select, size, cudaMemcpyHostToDevice);
|
||||
|
||||
marcar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselect, temp + 1);
|
||||
|
||||
if(numselfj > 0)
|
||||
{
|
||||
size = numselfj * sizeof(int);
|
||||
cudaMemcpy(dhead, selfjoin, size, cudaMemcpyHostToDevice);
|
||||
samejoin<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
|
||||
}
|
||||
|
||||
res = thrust::device_pointer_cast(temp);
|
||||
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
|
||||
num = res[rows];
|
||||
if(num == 0)
|
||||
return 0;
|
||||
|
||||
size = head_size * sizeof(int);
|
||||
reservar(&fres, num * size);
|
||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
||||
liberar(dhead, head_bytes);
|
||||
liberar(temp, size2);
|
||||
*ret = fres;
|
||||
return num;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(numselfj > 0)
|
||||
{
|
||||
tmplen = rows + 1;
|
||||
size2 = tmplen * sizeof(int);
|
||||
reservar(&temp, size2);
|
||||
cudaMemset(temp, 0, size2);
|
||||
|
||||
size = numselfj * sizeof(int);
|
||||
cudaMemcpy(dhead, selfjoin, size, cudaMemcpyHostToDevice);
|
||||
samejoin2<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, numselfj, temp + 1);
|
||||
|
||||
res = thrust::device_pointer_cast(temp);
|
||||
thrust::inclusive_scan(res + 1, res + tmplen, res + 1);
|
||||
num = res[rows];
|
||||
if(num == 0)
|
||||
return 0;
|
||||
|
||||
size = head_size * sizeof(int);
|
||||
reservar(&fres, num * size);
|
||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||
llenarproyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, temp, dhead, head_size, fres);
|
||||
liberar(dhead, head_bytes);
|
||||
liberar(temp, size2);
|
||||
*ret = fres;
|
||||
return num;
|
||||
}
|
||||
else
|
||||
{
|
||||
size = head_size * sizeof(int);
|
||||
reservar(&fres, rows * size);
|
||||
cudaMemcpy(dhead, project, size, cudaMemcpyHostToDevice);
|
||||
proyectar<<<blockllen, numthreads, size>>>(dop1, rows, cols, dhead, head_size, fres);
|
||||
liberar(dhead, head_bytes);
|
||||
*ret = fres;
|
||||
return rows;
|
||||
}
|
||||
}
|
||||
}
|
@ -5,7 +5,8 @@
|
||||
|
||||
main :-
|
||||
cuda_extensional(db/2, _X),
|
||||
cuda_rule((a(X, Y) :- db(Y, Z), db(X, Z), db(1, Z) ), _).
|
||||
cuda_rule((a(X, Y) :- db(Y, Z), db(X, Z), db(1, Z) ), Q),
|
||||
cuda_eval(Q, _).
|
||||
|
||||
db(1,a).
|
||||
db(2,a).
|
||||
|
1170
packages/cuda/treeb.cu
Executable file
1170
packages/cuda/treeb.cu
Executable file
File diff suppressed because it is too large
Load Diff
193
packages/cuda/union2.cu
Normal file
193
packages/cuda/union2.cu
Normal file
@ -0,0 +1,193 @@
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/unique.h>
|
||||
#include <thrust/distance.h>
|
||||
#include <iostream>
|
||||
|
||||
typedef struct n2
|
||||
{
|
||||
int v[2];
|
||||
}s2;
|
||||
|
||||
typedef struct n3
|
||||
{
|
||||
int v[3];
|
||||
}s3;
|
||||
|
||||
struct p2
|
||||
{
|
||||
__host__ __device__
|
||||
bool operator()(const s2 &r1, const s2 &r2)
|
||||
{
|
||||
int x;
|
||||
for(x = 0; x < 2; x++)
|
||||
{
|
||||
if(r1.v[x] != r2.v[x])
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
struct o2
|
||||
{
|
||||
__host__ __device__
|
||||
bool operator()(const s2 &r1, const s2 &r2)
|
||||
{
|
||||
int x;
|
||||
for(x = 0; x < 2; x++)
|
||||
{
|
||||
if(r1.v[x] > r2.v[x])
|
||||
return true;
|
||||
if(r1.v[x] < r2.v[x])
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
struct p3
|
||||
{
|
||||
__host__ __device__
|
||||
bool operator()(const s3 &r1, const s3 &r2)
|
||||
{
|
||||
int x;
|
||||
for(x = 0; x < 3; x++)
|
||||
{
|
||||
if(r1.v[x] != r2.v[x])
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
struct o3
|
||||
{
|
||||
__host__ __device__
|
||||
bool operator()(const s3 &r1, const s3 &r2)
|
||||
{
|
||||
int x;
|
||||
for(x = 0; x < 3; x++)
|
||||
{
|
||||
if(r1.v[x] > r2.v[x])
|
||||
return true;
|
||||
if(r1.v[x] < r2.v[x])
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
int unir(int *res, int rows, int tipo, int **ret)
|
||||
{
|
||||
thrust::device_ptr<int> pt, re;
|
||||
thrust::device_ptr<s2> pt2, re2;
|
||||
thrust::device_ptr<s3> pt3, re3;
|
||||
s2 *t2;
|
||||
s3 *t3;
|
||||
int flag, nrows;
|
||||
|
||||
switch(tipo)
|
||||
{
|
||||
case 1:
|
||||
{
|
||||
pt = thrust::device_pointer_cast(res);
|
||||
flag = 0;
|
||||
while(flag != 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
thrust::sort(pt, pt + rows);
|
||||
re = thrust::unique(pt, pt + rows);
|
||||
flag = 1;
|
||||
}
|
||||
catch(std::bad_alloc &e)
|
||||
{
|
||||
limpiar();
|
||||
}
|
||||
}
|
||||
nrows = thrust::distance(pt, re);
|
||||
thrust::device_vector<int> iVec(pt, pt + rows);
|
||||
iVec.resize(nrows);
|
||||
iVec.shrink_to_fit();
|
||||
return nrows;
|
||||
}
|
||||
case 2:
|
||||
{
|
||||
t2 = (s2*)res;
|
||||
|
||||
/*int *a, x, y;
|
||||
a = (int *)malloc(rows * 2 * sizeof(int));
|
||||
cudaMemcpy(a, res, rows * 2 * sizeof(int), cudaMemcpyDeviceToHost);
|
||||
cout << "INI" << endl;
|
||||
for(x = 0; x < rows; x++)
|
||||
{
|
||||
for(y = 0; y < 2; y++)
|
||||
cout << a[x * 2 + y] << " ";
|
||||
cout << endl;
|
||||
}
|
||||
cout << "INI fin" << endl;
|
||||
free(a);*/
|
||||
|
||||
pt2 = thrust::device_pointer_cast(t2);
|
||||
flag = 0;
|
||||
while(flag != 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
thrust::sort(pt2, pt2 + rows, o2());
|
||||
re2 = thrust::unique(pt2, pt2 + rows, p2());
|
||||
flag = 1;
|
||||
}
|
||||
catch(std::bad_alloc &e)
|
||||
{
|
||||
limpiar();
|
||||
}
|
||||
}
|
||||
nrows = thrust::distance(pt2, re2);
|
||||
thrust::device_vector<s2> iVec(pt2, pt2 + rows);
|
||||
iVec.resize(nrows);
|
||||
iVec.shrink_to_fit();
|
||||
|
||||
/*tam = (int)(re2.get() - pt2.get());
|
||||
a = (int *)malloc(tam * 2 * sizeof(int));
|
||||
cudaMemcpy(a, res, tam * 2 * sizeof(int), cudaMemcpyDeviceToHost);
|
||||
cout << "FIN" << endl;
|
||||
for(x = 0; x < tam; x++)
|
||||
{
|
||||
for(y = 0; y < 2; y++)
|
||||
cout << a[x * 2 + y] << " ";
|
||||
cout << endl;
|
||||
}
|
||||
cout << "FIN fin" << endl;
|
||||
free(a);
|
||||
cout << "antes = " << rows << " despues = " << thrust::distance(pt2, re2) << endl;*/
|
||||
|
||||
return nrows;
|
||||
}
|
||||
case 3:
|
||||
{
|
||||
t3 = (s3*)res;
|
||||
pt3 = thrust::device_pointer_cast(t3);
|
||||
flag = 0;
|
||||
while(flag != 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
thrust::sort(pt3, pt3 + rows, o3());
|
||||
re3 = thrust::unique(pt3, pt3 + rows, p3());
|
||||
flag = 1;
|
||||
}
|
||||
catch(std::bad_alloc &e)
|
||||
{
|
||||
limpiar();
|
||||
}
|
||||
}
|
||||
nrows = thrust::distance(pt3, re3);
|
||||
thrust::device_vector<s3> iVec(pt3, pt3 + rows);
|
||||
iVec.resize(nrows);
|
||||
iVec.shrink_to_fit();
|
||||
return nrows;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
Reference in New Issue
Block a user