Skip to content

Commit

Permalink
remove unused files
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangpiu committed Aug 12, 2024
1 parent a860491 commit 3056e48
Show file tree
Hide file tree
Showing 14 changed files with 127 additions and 161 deletions.
144 changes: 5 additions & 139 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@ project(llm.cpp LANGUAGES C CXX CUDA)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CUDA_STANDARD 17)
set(BUILD_SHARED_LIBS OFF)
#add_compile_options(-Ofast -march=native)
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Ofast -march=native")
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Ofast -march=native")
# add_compile_options(-Ofast -march=native)
# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Ofast -march=native")
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Ofast -march=native")

find_program(CCACHE_PROGRAM ccache)
if (CCACHE_PROGRAM)
message("--- ccache: ${CCACHE_PROGRAM}")
set(CMAKE_C_COMPILER_LAUNCHER ccache)
set(CMAKE_CXX_COMPILER_LAUNCHER ccache)
set(CMAKE_CUDA_COMPILER_LAUNCHER ccache)
Expand All @@ -19,14 +18,6 @@ endif ()
enable_testing()
include_directories(.)

# OpenMP
find_package(OpenMP)
if (OpenMP_FOUND)
add_compile_definitions(OMP)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
endif ()

# Abseil
set(ABSL_PROPAGATE_CXX_STD ON)
add_subdirectory(third_party/abseil-cpp)
Expand All @@ -41,130 +32,5 @@ add_definitions(-DEIGEN_DONT_PARALLELIZE)
add_definitions(-DEIGEN_USE_THREADS)
include_directories(${EIGEN3_INCLUDE_DIR})

# llm.c
add_executable(train_gpt2 train_gpt2.c)
target_link_libraries(train_gpt2 m ${OpenMP_CXX_LIBRARIES})

add_executable(test_gpt2 test_gpt2.c)
target_link_libraries(test_gpt2 m ${OpenMP_CXX_LIBRARIES})

# llm.c GPU
set(CMAKE_CUDA_ARCHITECTURES 60 61 70 75)
find_package(CUDA)
if (CUDA_FOUND)
set_source_files_properties(llmcpp/pch.hpp PROPERTIES HEADER_FILE_ONLY TRUE)
add_library(pch_gpu INTERFACE)
target_sources(pch_gpu INTERFACE llmcpp/pch.hpp)

add_library(nn_gpu llmcpp/nn.cu)
target_compile_definitions(nn_gpu PUBLIC EIGEN_USE_GPU)
target_link_libraries(nn_gpu
pch_gpu
absl::strings absl::log absl::check
${CUDA_LIBRARIES}
)
target_precompile_headers(nn_gpu PUBLIC llmcpp/pch.hpp)

add_library(gpt_gpu llmcpp/gpt.cu)
target_link_libraries(gpt_gpu
nn_gpu
)
target_precompile_headers(gpt_gpu PUBLIC llmcpp/pch.hpp)

# add_compile_definitions(EIGEN_USE_GPU)
#SET(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr)
add_compile_definitions(ENABLE_FP32)
add_executable(train_gpt2cu train_gpt2.cu)
set_target_properties(train_gpt2cu PROPERTIES
CUDA_SEPARABLE_COMPILATION ON
CUDA_ARCHITECTURES "61;70;75"
)
target_link_libraries(train_gpt2cu ${CUDA_LIBRARIES} cublas cublasLt)

add_executable(test_eigen_gpu llmcpp/test_eigen_gpu.cu)
# target_compile_definitions(test_eigen_gpu PRIVATE EIGEN_USE_GPU)
set_target_properties(test_eigen_gpu PROPERTIES
CUDA_SEPARABLE_COMPILATION ON
CUDA_ARCHITECTURES "61;70;75"
)
target_link_libraries(test_eigen_gpu
absl::strings absl::log absl::check
# ${CUDA_LIBRARIES} cublas cublasLt
)
# target_compile_options(test_eigen_gpu PRIVATE -Xcompiler=-Ofast,-march=native)

# nn_test_gpu
add_executable(nn_test_gpu llmcpp/nn_test.cu)
target_link_libraries(nn_test_gpu
nn_gpu
GTest::gtest_main
)

# gpt_test_gpu
add_executable(gpt_test_gpu llmcpp/gpt_test.cu)
target_link_libraries(gpt_test_gpu
gpt_gpu
GTest::gtest_main
)

# gpt_optim_gpu
add_executable(gpt_optim_gpu llmcpp/gpt_optim.cu)
target_link_libraries(gpt_optim_gpu
gpt_gpu
)

# train_gpt2_gpu
add_executable(train_gpt2_gpu llmcpp/train_gpt2.cu)
target_link_libraries(train_gpt2_gpu
gpt_gpu
)
target_compile_options(train_gpt2_gpu PRIVATE -O3)
target_precompile_headers(train_gpt2_gpu PUBLIC llmcpp/pch.hpp)
endif ()

set_source_files_properties(llmcpp/pch.hpp PROPERTIES HEADER_FILE_ONLY TRUE)
add_library(pch INTERFACE)
target_sources(pch INTERFACE llmcpp/pch.hpp)

# llm.cpp
add_library(nn llmcpp/nn.cpp)
target_link_libraries(nn
pch
absl::strings absl::log absl::check)
target_precompile_headers(nn PUBLIC llmcpp/pch.hpp)

add_library(gpt llmcpp/gpt.cpp)
target_link_libraries(gpt nn)

add_library(gpt2 llmcpp/gpt2.cpp)
target_link_libraries(gpt2 gpt)

add_library(optim llmcpp/optim.cpp)
target_link_libraries(optim nn)

add_executable(test_gpt2_cpu llmcpp/test_gpt2.cpp)
target_link_libraries(test_gpt2_cpu gpt2 optim)
target_compile_options(test_gpt2_cpu PRIVATE -Ofast -march=native)

add_executable(train_gpt2_cpu llmcpp/train_gpt2.cpp)
target_link_libraries(train_gpt2_cpu
gpt2 optim
profiler
)
target_compile_options(train_gpt2_cpu PRIVATE -Ofast -march=native)

add_executable(nn_test llmcpp/nn_test.cpp)
target_link_libraries(nn_test nn GTest::gtest_main)

add_executable(optim_test llmcpp/optim_test.cpp)
target_link_libraries(optim_test nn GTest::gtest_main)

add_executable(gpt_test llmcpp/gpt_test.cpp)
target_link_libraries(gpt_test gpt GTest::gtest_main)

add_executable(gpt_optim llmcpp/gpt_optim.cpp)
target_link_libraries(gpt_optim gpt)

add_executable(test_eigen_cpu llmcpp/test_eigen_cpu.cpp)
target_link_libraries(test_eigen_cpu absl::strings absl::log absl::check GTest::gtest_main)
target_compile_options(test_eigen_cpu PRIVATE -Ofast -march=native)
add_subdirectory(llmc)
add_subdirectory(llmcpp)
25 changes: 25 additions & 0 deletions llmc/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@

# OpenMP
find_package(OpenMP)
if (OpenMP_FOUND)
add_compile_definitions(OMP)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
endif ()

# llm.c
add_executable(train_gpt2 ../train_gpt2.c)
target_link_libraries(train_gpt2 m ${OpenMP_CXX_LIBRARIES})

add_executable(test_gpt2 ../test_gpt2.c)
target_link_libraries(test_gpt2 m ${OpenMP_CXX_LIBRARIES})

if (CUDA_FOUND)
add_compile_definitions(ENABLE_FP32)
add_executable(train_gpt2cu train_gpt2.cu)
set_target_properties(train_gpt2cu PROPERTIES
CUDA_SEPARABLE_COMPILATION ON
CUDA_ARCHITECTURES "61;70;75"
)
target_link_libraries(train_gpt2cu ${CUDA_LIBRARIES} cublas cublasLt)
endif ()
90 changes: 90 additions & 0 deletions llmcpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# llm.cpp
add_library(nn nn.hpp)
target_link_libraries(nn
absl::strings absl::log absl::check pthread)

add_library(gpt gpt.hpp)
target_link_libraries(gpt nn)

add_library(gpt2 gpt2.hpp)
target_link_libraries(gpt2 gpt)

add_library(optim optim.hpp)
target_link_libraries(optim nn)

add_executable(test_gpt2_cpu test_gpt2.cpp)
target_link_libraries(test_gpt2_cpu gpt2 optim)
target_compile_options(test_gpt2_cpu PRIVATE -Ofast -march=native)

add_executable(train_gpt2_cpu train_gpt2.cpp)
target_link_libraries(train_gpt2_cpu
gpt2 optim
profiler
)
target_compile_options(train_gpt2_cpu PRIVATE -Ofast -march=native)

add_executable(nn_test nn_test.cpp)
target_link_libraries(nn_test nn GTest::gtest_main)

add_executable(optim_test optim_test.cpp)
target_link_libraries(optim_test nn GTest::gtest_main)

add_executable(gpt_test gpt_test.cpp)
target_link_libraries(gpt_test gpt GTest::gtest_main)

add_executable(gpt_optim gpt_optim.cpp)
target_link_libraries(gpt_optim gpt)

add_executable(test_eigen_cpu test_eigen_cpu.cpp)
target_link_libraries(test_eigen_cpu nn)
target_compile_options(test_eigen_cpu PRIVATE -Ofast -march=native)

set(CMAKE_CUDA_ARCHITECTURES 60 61 70 75)
find_package(CUDA)
if (CUDA_FOUND)
add_library(nn_gpu nn.hpp)
target_compile_definitions(nn_gpu PUBLIC EIGEN_USE_GPU)
target_link_libraries(nn_gpu
absl::strings absl::log absl::check
${CUDA_LIBRARIES}
)

add_library(gpt_gpu gpt.hpp)
target_link_libraries(gpt_gpu
nn_gpu
)

add_executable(test_eigen_gpu test_eigen_gpu.cu)
target_compile_definitions(test_eigen_gpu PRIVATE EIGEN_USE_GPU)
target_link_libraries(test_eigen_gpu
nn_gpu
)
# target_compile_options(test_eigen_gpu PRIVATE -Xcompiler=-Ofast,-march=native)

# nn_test_gpu
add_executable(nn_test_gpu nn_test.cu)
target_link_libraries(nn_test_gpu
nn_gpu
GTest::gtest_main
)

# gpt_test_gpu
add_executable(gpt_test_gpu gpt_test.cu)
target_link_libraries(gpt_test_gpu
gpt_gpu
GTest::gtest_main
)

# gpt_optim_gpu
add_executable(gpt_optim_gpu gpt_optim.cu)
target_link_libraries(gpt_optim_gpu
gpt_gpu
)

# train_gpt2_gpu
add_executable(train_gpt2_gpu train_gpt2.cu)
target_link_libraries(train_gpt2_gpu
gpt_gpu
)
target_compile_options(train_gpt2_gpu PRIVATE -O3)
endif ()
6 changes: 3 additions & 3 deletions llmcpp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ mkdir build && cd build
cmake ..
make train_gpt2_cpu
cd ../
./build/train_gpt2_cpu
./build/llmcpp/train_gpt2_cpu
```

The above lines
Expand Down Expand Up @@ -68,7 +68,7 @@ mkdir build && cd build
cmake ..
make train_gpt2_gpu
cd ../
./build/train_gpt2_gpu
./build/llmcpp/train_gpt2_gpu
```


Expand Down Expand Up @@ -98,7 +98,7 @@ mkdir build && cd build
cmake ..
make test_gpt2_cpu
cd ../
./build/test_gpt2_cpu
./build/llmcpp/test_gpt2_cpu
```

This now loads the `gpt2_124M_debug_state.bin` file that gets written by train_gpt2.py, runs a forward pass, compares the logits and loss with the PyTorch reference implementation, then it does 10 iterations of training with Adam and makes sure the losses match PyTorch.
Expand Down
1 change: 0 additions & 1 deletion llmcpp/gpt.cpp

This file was deleted.

1 change: 0 additions & 1 deletion llmcpp/gpt.cu

This file was deleted.

1 change: 0 additions & 1 deletion llmcpp/gpt2.cpp

This file was deleted.

1 change: 0 additions & 1 deletion llmcpp/nn.cpp

This file was deleted.

1 change: 0 additions & 1 deletion llmcpp/nn.cu

This file was deleted.

1 change: 0 additions & 1 deletion llmcpp/optim.cpp

This file was deleted.

7 changes: 0 additions & 7 deletions llmcpp/pch.hpp

This file was deleted.

4 changes: 2 additions & 2 deletions llmcpp/tensor_types.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#ifndef LLM_CPP_LLMCPP_TENSOR_TYPES_HPP_
#define LLM_CPP_LLMCPP_TENSOR_TYPES_HPP_

#include "pch.hpp"
#include "Eigen/Dense"
#include "unsupported/Eigen/CXX11/Tensor"

// Helper to define Tensor types given that the scalar is of type T.
template <typename T, int NDIMS = 1, typename IndexType = Eigen::DenseIndex>
Expand Down Expand Up @@ -85,5 +86,4 @@ struct TTypes {
UnalignedConstMatrix;
};


#endif // LLM_CPP_LLMCPP_TENSOR_TYPES_HPP_
5 changes: 2 additions & 3 deletions llmcpp/test_eigen_cpu.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include "gpt.hpp"
#include "optim.hpp"
#include "nn.hpp"

using Tensor1D = Eigen::Tensor<float, 1, Eigen::RowMajor>;
using Tensor2D = Eigen::Tensor<float, 2, Eigen::RowMajor>;
Expand Down Expand Up @@ -35,7 +34,7 @@ int main(int argc, char** argv) {

auto start = std::chrono::steady_clock::now();
for (int i = 0; i < 10; ++i) {
nn::MatMul<float>::Forward(xm, lm_headm, ym);
nn::MatMul::Forward(xm, lm_headm, ym);
}
auto end = std::chrono::steady_clock::now();
std::cout << "avg: "
Expand Down
1 change: 0 additions & 1 deletion llmcpp/train_gpt2.cu
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include "llmc/dataloader.h"
#include "llmc/tokenizer.h"
#include "optim.hpp"
#include "pch.hpp"

// sampler

Expand Down

0 comments on commit 3056e48

Please sign in to comment.