From 8a5ffdbb8b3da5aa997929019f45699de1f785bc Mon Sep 17 00:00:00 2001
From: TABATA Keiichi <tabata.k.ichi@gmail.com>
Date: Fri, 13 Jul 2018 21:11:59 +0900
Subject: [PATCH 01/25] Support OpenCL offline compilation

---
 cmake/config.cmake                  |  7 ++++
 cmake/modules/OpenCL.cmake          |  9 +++++
 src/runtime/opencl/opencl_common.h  |  2 ++
 src/runtime/opencl/opencl_module.cc | 55 +++++++++++++++++++++++++++++
 4 files changed, 73 insertions(+)
diff --git a/cmake/config.cmake b/cmake/config.cmake
index 89f5a0597e43..0eff4c325d3e 100644
--- a/cmake/config.cmake
+++ b/cmake/config.cmake
@@ -42,6 +42,13 @@ set(USE_ROCM OFF)
 # Whether enable SDAccel runtime
 set(USE_SDACCEL OFF)
 
+# Whether enable Intel FPGA SDK for OpenCL (AOCL) during compile,
+#
+# Possible values:
+# - OFF: disbale AOCL
+# - board_name: use specific board name for offline compilation
+set(USE_AOCL OFF)
+
 # Whether enable OpenCL runtime
 set(USE_OPENCL OFF)
 
diff --git a/cmake/modules/OpenCL.cmake b/cmake/modules/OpenCL.cmake
index 8ae4b29aff48..7ec4f5a8c7df 100644
--- a/cmake/modules/OpenCL.cmake
+++ b/cmake/modules/OpenCL.cmake
@@ -19,6 +19,15 @@ else()
   list(APPEND COMPILER_SRCS src/codegen/opt/build_sdaccel_off.cc)
 endif(USE_SDACCEL)
 
+if(USE_AOCL)
+  message(STATUS "Build with Intel FPGA SDK for OpenCL support")
+  add_definitions(-DAOCL_BOARD_NAME=${USE_AOCL})
+  if(NOT USE_OPENCL)
+    message(STATUS "Enable OpenCL support required for Intel FPGA SDK for OpenCL")
+    set(USE_OPENCL ON)
+  endif()
+endif(USE_AOCL)
+
 if(USE_OPENCL)
   find_package(OpenCL REQUIRED)
   message(STATUS "Build with OpenCL support")
diff --git a/src/runtime/opencl/opencl_common.h b/src/runtime/opencl/opencl_common.h
index 46b212bee464..dca967407d52 100644
--- a/src/runtime/opencl/opencl_common.h
+++ b/src/runtime/opencl/opencl_common.h
@@ -281,6 +281,8 @@ class OpenCLModuleNode : public ModuleNode {
   std::unordered_map<std::string, KTRefEntry> kid_map_;
   // kernels build so far.
   std::vector<cl_kernel> kernels_;
+
+  void OfflineCompile(cl::OpenCLWorkspace* w,  cl::OpenCLThreadEntry* t);
 };
 
 }  // namespace runtime
diff --git a/src/runtime/opencl/opencl_module.cc b/src/runtime/opencl/opencl_module.cc
index 68907d11cd75..e2f1b1f240a4 100644
--- a/src/runtime/opencl/opencl_module.cc
+++ b/src/runtime/opencl/opencl_module.cc
@@ -6,10 +6,15 @@
 #include <tvm/runtime/registry.h>
 #include <vector>
 #include <string>
+#include <iostream>
+#include <fstream>
 #include <unordered_map>
 #include "./opencl_common.h"
 #include "./opencl_module.h"
 
+#define STRINGIFY(s) #s
+#define TOSTRING(id) STRINGIFY(id)
+
 namespace tvm {
 namespace runtime {
 
@@ -189,11 +194,15 @@ cl_kernel OpenCLModuleNode::InstallKernel(cl::OpenCLWorkspace* w,
     // create program
     if (fmt_ == "cl") {
       if (program_ == nullptr) {
+#ifndef AOCL_BOARD_NAME
         const char* s = data_.c_str();
         size_t len = data_.length();
         cl_int err;
         program_ = clCreateProgramWithSource(w->context, 1, &s, &len, &err);
         OPENCL_CHECK_ERROR(err);
+#else
+	OfflineCompile(w, t);
+#endif
       }
     } else if (fmt_ == "xclbin" || fmt_ == "awsxclbin") {
       const unsigned char* s = (const unsigned char *)data_.c_str();
@@ -231,6 +240,52 @@ cl_kernel OpenCLModuleNode::InstallKernel(cl::OpenCLWorkspace* w,
   return kernel;
 }
 
+void OpenCLModuleNode::OfflineCompile(cl::OpenCLWorkspace* w,
+				      cl::OpenCLThreadEntry* t) {
+    // Write a .cl file.
+    std::ofstream ofs("aocltmp.cl");
+    if (!ofs) {
+      LOG(FATAL) << "Can't create OpenCL temporary file.";
+    }
+    ofs << data_.c_str();
+    if (!ofs) {
+      LOG(FATAL) << "Can't write to OpenCL temporary file.";
+    }
+    ofs.close();
+
+    // Compile the .cl file.
+    std::string cmd = "aoc aocltmp.cl -march=emulator -board=";
+    cmd += TOSTRING(AOCL_BOARD_NAME_STR);
+    if (system(cmd.c_str()) != 0) {
+      LOG(FATAL) << "OpenCL offline compilation error.";
+    }
+
+    // Read .aocx file
+    std::ifstream ifs("aocltmp.aocx", std::ios::in | std::ios::binary);
+    if (!ifs) {
+      LOG(FATAL) << "Can't open aocltmp.aocx file.";
+    }
+    ifs.seekg(0, std::fstream::end);
+    const size_t len = ifs.tellg();
+    char *buf = new char[len];
+    ifs.clear();
+    ifs.seekg(0, std::fstream::beg);
+    ifs.read(buf, len);
+    if (!ifs) {
+      LOG(FATAL) << "Can't read aocltmp.aocx file.";
+    }
+
+    // Create program from aocx.
+    cl_int err;
+    int device_id = t->context.device_id;
+    cl_device_id dev = w->devices[device_id];
+    const unsigned char* s = (const unsigned char *)buf;
+    program_ = clCreateProgramWithBinary(w->context, 1, &dev, &len, &s, NULL, &err);
+    OPENCL_CHECK_ERROR(err);
+
+    delete[] buf;
+}
+
 Module OpenCLModuleCreate(
     std::string data,
     std::string fmt,

From 120979c54cf7110c36878f62b9c23a2278a91404 Mon Sep 17 00:00:00 2001
From: ktabata <tabata.keiichi@lab.ntt.co.jp>
Date: Mon, 23 Jul 2018 15:31:35 +0900
Subject: [PATCH 02/25] AOCL emulation runs.

---
 cmake/modules/OpenCL.cmake              |  2 +-
 dmlc-core                               |  2 +-
 docs/deploy/aocl_fpga.md                | 56 +++++++++++++++++++++++++
 src/runtime/opencl/opencl_common.h      |  4 ++
 src/runtime/opencl/opencl_device_api.cc |  4 ++
 src/runtime/opencl/opencl_module.cc     |  5 +--
 6 files changed, 67 insertions(+), 6 deletions(-)
 create mode 100644 docs/deploy/aocl_fpga.md

diff --git a/cmake/modules/OpenCL.cmake b/cmake/modules/OpenCL.cmake
index 7ec4f5a8c7df..22dc2223091c 100644
--- a/cmake/modules/OpenCL.cmake
+++ b/cmake/modules/OpenCL.cmake
@@ -21,7 +21,7 @@ endif(USE_SDACCEL)
 
 if(USE_AOCL)
   message(STATUS "Build with Intel FPGA SDK for OpenCL support")
-  add_definitions(-DAOCL_BOARD_NAME=${USE_AOCL})
+  add_definitions(-DAOCL_BOARD_NAME="${USE_AOCL}")
   if(NOT USE_OPENCL)
     message(STATUS "Enable OpenCL support required for Intel FPGA SDK for OpenCL")
     set(USE_OPENCL ON)
diff --git a/dmlc-core b/dmlc-core
index e864aa6757cd..5abeceeaea21 160000
--- a/dmlc-core
+++ b/dmlc-core
@@ -1 +1 @@
-Subproject commit e864aa6757cdbe78b1296fe5231fd3050b7802c3
+Subproject commit 5abeceeaea21f9461792b130720fc215a86040f5
diff --git a/docs/deploy/aocl_fpga.md b/docs/deploy/aocl_fpga.md
new file mode 100644
index 000000000000..6ed626645d94
--- /dev/null
+++ b/docs/deploy/aocl_fpga.md
@@ -0,0 +1,56 @@
+AOCL Backend Example
+====================
+
+TVM supports Intel FPGA SDK for OpenCL also known as AOCL.  Here is a tutorial for how to use TVM with AOCL.
+
+***Note***: This feature is still experimental.  We cannot use AOCL to deploy an end to end neural networks for now.  In addition, we can only use AOCL's emulation mode for now.
+
+We use a python scripts for this tutorial.
+
+- emu-aocl-fpga.py
+```# -*- coding: utf-8 -*-
+import tvm
+import numpy as np
+
+tgt_host = 'llvm'
+tgt = 'opencl'
+
+# Define a computation.
+n = tvm.var('n')
+a = tvm.placeholder((n,), name='a')
+b = tvm.placeholder((n,), name='b')
+c = tvm.compute(a.shape, lambda i: a[i] + b[i], name='c')
+
+# Make a schedule.
+s = tvm.create_schedule(c.op)
+px, x = s[c].split(c.op.axis[0], nparts=1)
+s[c].bind(px, tvm.thread_axis("pipeline"))
+
+# Make a executable code.
+fadd = tvm.build(s, [a, b, c], tgt, target_host=tgt_host, name='myadd')
+
+# Run.
+ctx = tvm.context(tgt, 0)
+n = 1024
+a = tvm.nd.array(np.random.uniform(size=n).astype(a.dtype), ctx)
+b = tvm.nd.array(np.random.uniform(size=n).astype(b.dtype), ctx)
+c = tvm.nd.array(np.zeros(n, dtype=c.dtype), ctx)
+fadd(a, b, c)```
+
+Setup
+-----
+
+- Install AOCL 17.1 on Ubuntu 16.04.4 LTS.
+- Install FPGA device driver.
+- Make ICD file.
+- Make FCD file.
+- Setup TVM with AOCL and OpenCL enabled.
+
+Emulation
+---------
+
+- Set environment variable.
+```export CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA=1```
+
+- Run software emulation
+```python emu-aocl-fpga.py```
diff --git a/src/runtime/opencl/opencl_common.h b/src/runtime/opencl/opencl_common.h
index 4b26264491a2..ebbc3ed140e6 100644
--- a/src/runtime/opencl/opencl_common.h
+++ b/src/runtime/opencl/opencl_common.h
@@ -143,7 +143,11 @@ class OpenCLWorkspace : public DeviceAPI {
   void Init(const std::string& type_key, const std::string& device_type,
             const std::string& platform_name = "");
   virtual void Init() {
+#ifndef AOCL_BOARD_NAME
     Init("opencl", "gpu");
+#else
+    Init("opencl", "accelerator");
+#endif
   }
   // Check whether the context is OpenCL or not.
   virtual bool IsOpenCLDevice(TVMContext ctx) {
diff --git a/src/runtime/opencl/opencl_device_api.cc b/src/runtime/opencl/opencl_device_api.cc
index ac9373f1375b..5a724abefbf0 100644
--- a/src/runtime/opencl/opencl_device_api.cc
+++ b/src/runtime/opencl/opencl_device_api.cc
@@ -208,7 +208,11 @@ std::vector<cl_device_id> GetDeviceIDs(
   cl_device_type dtype = CL_DEVICE_TYPE_ALL;
   if (device_type == "cpu") dtype = CL_DEVICE_TYPE_CPU;
   if (device_type == "gpu") dtype = CL_DEVICE_TYPE_GPU;
+#ifndef AOCL_BOARD_NAME
   if (device_type == "accelerator") dtype = CL_DEVICE_TYPE_ACCELERATOR;
+#else
+  if (device_type == "accelerator") dtype = CL_DEVICE_TYPE_DEFAULT;
+#endif
   cl_uint ret_size;
   cl_int code = clGetDeviceIDs(pid, dtype, 0, nullptr, &ret_size);
   std::vector<cl_device_id> ret;
diff --git a/src/runtime/opencl/opencl_module.cc b/src/runtime/opencl/opencl_module.cc
index 731877b90af7..acc2cf561b87 100644
--- a/src/runtime/opencl/opencl_module.cc
+++ b/src/runtime/opencl/opencl_module.cc
@@ -12,9 +12,6 @@
 #include "./opencl_common.h"
 #include "./opencl_module.h"
 
-#define STRINGIFY(s) #s
-#define TOSTRING(id) STRINGIFY(id)
-
 namespace tvm {
 namespace runtime {
 
@@ -251,7 +248,7 @@ void OpenCLModuleNode::OfflineCompile(cl::OpenCLWorkspace* w,
 
     // Compile the .cl file.
     std::string cmd = "aoc aocltmp.cl -march=emulator -board=";
-    cmd += TOSTRING(AOCL_BOARD_NAME_STR);
+    cmd += AOCL_BOARD_NAME;
     if (system(cmd.c_str()) != 0) {
       LOG(FATAL) << "OpenCL offline compilation error.";
     }

From ca137cbe6a73a3877bab447f6b3dd7187dee19c3 Mon Sep 17 00:00:00 2001
From: TABATA Keiichi <tabata.k.ichi@gmail.com>
Date: Fri, 13 Jul 2018 21:11:59 +0900
Subject: [PATCH 03/25] Support OpenCL offline compilation

---
 cmake/config.cmake                  |  7 ++++
 cmake/modules/OpenCL.cmake          |  9 +++++
 src/runtime/opencl/opencl_common.h  |  2 ++
 src/runtime/opencl/opencl_module.cc | 55 +++++++++++++++++++++++++++++
 4 files changed, 73 insertions(+)

diff --git a/cmake/config.cmake b/cmake/config.cmake
index 89f5a0597e43..0eff4c325d3e 100644
--- a/cmake/config.cmake
+++ b/cmake/config.cmake
@@ -42,6 +42,13 @@ set(USE_ROCM OFF)
 # Whether enable SDAccel runtime
 set(USE_SDACCEL OFF)
 
+# Whether enable Intel FPGA SDK for OpenCL (AOCL) during compile,
+#
+# Possible values:
+# - OFF: disbale AOCL
+# - board_name: use specific board name for offline compilation
+set(USE_AOCL OFF)
+
 # Whether enable OpenCL runtime
 set(USE_OPENCL OFF)
 
diff --git a/cmake/modules/OpenCL.cmake b/cmake/modules/OpenCL.cmake
index 8ae4b29aff48..7ec4f5a8c7df 100644
--- a/cmake/modules/OpenCL.cmake
+++ b/cmake/modules/OpenCL.cmake
@@ -19,6 +19,15 @@ else()
   list(APPEND COMPILER_SRCS src/codegen/opt/build_sdaccel_off.cc)
 endif(USE_SDACCEL)
 
+if(USE_AOCL)
+  message(STATUS "Build with Intel FPGA SDK for OpenCL support")
+  add_definitions(-DAOCL_BOARD_NAME=${USE_AOCL})
+  if(NOT USE_OPENCL)
+    message(STATUS "Enable OpenCL support required for Intel FPGA SDK for OpenCL")
+    set(USE_OPENCL ON)
+  endif()
+endif(USE_AOCL)
+
 if(USE_OPENCL)
   find_package(OpenCL REQUIRED)
   message(STATUS "Build with OpenCL support")
diff --git a/src/runtime/opencl/opencl_common.h b/src/runtime/opencl/opencl_common.h
index c37dbaa94d7a..4b26264491a2 100644
--- a/src/runtime/opencl/opencl_common.h
+++ b/src/runtime/opencl/opencl_common.h
@@ -284,6 +284,8 @@ class OpenCLModuleNode : public ModuleNode {
   std::unordered_map<std::string, KTRefEntry> kid_map_;
   // kernels build so far.
   std::vector<cl_kernel> kernels_;
+
+  void OfflineCompile(cl::OpenCLWorkspace* w,  cl::OpenCLThreadEntry* t);
 };
 
 }  // namespace runtime
diff --git a/src/runtime/opencl/opencl_module.cc b/src/runtime/opencl/opencl_module.cc
index aa51f17d4021..731877b90af7 100644
--- a/src/runtime/opencl/opencl_module.cc
+++ b/src/runtime/opencl/opencl_module.cc
@@ -6,10 +6,15 @@
 #include <tvm/runtime/registry.h>
 #include <vector>
 #include <string>
+#include <iostream>
+#include <fstream>
 #include <unordered_map>
 #include "./opencl_common.h"
 #include "./opencl_module.h"
 
+#define STRINGIFY(s) #s
+#define TOSTRING(id) STRINGIFY(id)
+
 namespace tvm {
 namespace runtime {
 
@@ -185,11 +190,15 @@ cl_kernel OpenCLModuleNode::InstallKernel(cl::OpenCLWorkspace* w,
     // create program
     if (fmt_ == "cl") {
       if (program_ == nullptr) {
+#ifndef AOCL_BOARD_NAME
         const char* s = data_.c_str();
         size_t len = data_.length();
         cl_int err;
         program_ = clCreateProgramWithSource(w->context, 1, &s, &len, &err);
         OPENCL_CHECK_ERROR(err);
+#else
+	OfflineCompile(w, t);
+#endif
       }
     } else if (fmt_ == "xclbin" || fmt_ == "awsxclbin") {
       const unsigned char* s = (const unsigned char *)data_.c_str();
@@ -227,6 +236,52 @@ cl_kernel OpenCLModuleNode::InstallKernel(cl::OpenCLWorkspace* w,
   return kernel;
 }
 
+void OpenCLModuleNode::OfflineCompile(cl::OpenCLWorkspace* w,
+				      cl::OpenCLThreadEntry* t) {
+    // Write a .cl file.
+    std::ofstream ofs("aocltmp.cl");
+    if (!ofs) {
+      LOG(FATAL) << "Can't create OpenCL temporary file.";
+    }
+    ofs << data_.c_str();
+    if (!ofs) {
+      LOG(FATAL) << "Can't write to OpenCL temporary file.";
+    }
+    ofs.close();
+
+    // Compile the .cl file.
+    std::string cmd = "aoc aocltmp.cl -march=emulator -board=";
+    cmd += TOSTRING(AOCL_BOARD_NAME_STR);
+    if (system(cmd.c_str()) != 0) {
+      LOG(FATAL) << "OpenCL offline compilation error.";
+    }
+
+    // Read .aocx file
+    std::ifstream ifs("aocltmp.aocx", std::ios::in | std::ios::binary);
+    if (!ifs) {
+      LOG(FATAL) << "Can't open aocltmp.aocx file.";
+    }
+    ifs.seekg(0, std::fstream::end);
+    const size_t len = ifs.tellg();
+    char *buf = new char[len];
+    ifs.clear();
+    ifs.seekg(0, std::fstream::beg);
+    ifs.read(buf, len);
+    if (!ifs) {
+      LOG(FATAL) << "Can't read aocltmp.aocx file.";
+    }
+
+    // Create program from aocx.
+    cl_int err;
+    int device_id = t->context.device_id;
+    cl_device_id dev = w->devices[device_id];
+    const unsigned char* s = (const unsigned char *)buf;
+    program_ = clCreateProgramWithBinary(w->context, 1, &dev, &len, &s, NULL, &err);
+    OPENCL_CHECK_ERROR(err);
+
+    delete[] buf;
+}
+
 Module OpenCLModuleCreate(
     std::string data,
     std::string fmt,

From f2257d0e534654fab341f304b5d2cbc33593f010 Mon Sep 17 00:00:00 2001
From: ktabata <tabata.keiichi@lab.ntt.co.jp>
Date: Mon, 23 Jul 2018 15:31:35 +0900
Subject: [PATCH 04/25] AOCL emulation runs.

---
 cmake/modules/OpenCL.cmake              |  2 +-
 dmlc-core                               |  2 +-
 docs/deploy/aocl_fpga.md                | 56 +++++++++++++++++++++++++
 src/runtime/opencl/opencl_common.h      |  4 ++
 src/runtime/opencl/opencl_device_api.cc |  4 ++
 src/runtime/opencl/opencl_module.cc     |  5 +--
 6 files changed, 67 insertions(+), 6 deletions(-)
 create mode 100644 docs/deploy/aocl_fpga.md

diff --git a/cmake/modules/OpenCL.cmake b/cmake/modules/OpenCL.cmake
index 7ec4f5a8c7df..22dc2223091c 100644
--- a/cmake/modules/OpenCL.cmake
+++ b/cmake/modules/OpenCL.cmake
@@ -21,7 +21,7 @@ endif(USE_SDACCEL)
 
 if(USE_AOCL)
   message(STATUS "Build with Intel FPGA SDK for OpenCL support")
-  add_definitions(-DAOCL_BOARD_NAME=${USE_AOCL})
+  add_definitions(-DAOCL_BOARD_NAME="${USE_AOCL}")
   if(NOT USE_OPENCL)
     message(STATUS "Enable OpenCL support required for Intel FPGA SDK for OpenCL")
     set(USE_OPENCL ON)
diff --git a/dmlc-core b/dmlc-core
index e864aa6757cd..5abeceeaea21 160000
--- a/dmlc-core
+++ b/dmlc-core
@@ -1 +1 @@
-Subproject commit e864aa6757cdbe78b1296fe5231fd3050b7802c3
+Subproject commit 5abeceeaea21f9461792b130720fc215a86040f5
diff --git a/docs/deploy/aocl_fpga.md b/docs/deploy/aocl_fpga.md
new file mode 100644
index 000000000000..6ed626645d94
--- /dev/null
+++ b/docs/deploy/aocl_fpga.md
@@ -0,0 +1,56 @@
+AOCL Backend Example
+====================
+
+TVM supports Intel FPGA SDK for OpenCL also known as AOCL.  Here is a tutorial for how to use TVM with AOCL.
+
+***Note***: This feature is still experimental.  We cannot use AOCL to deploy an end to end neural networks for now.  In addition, we can only use AOCL's emulation mode for now.
+
+We use a python scripts for this tutorial.
+
+- emu-aocl-fpga.py
+```# -*- coding: utf-8 -*-
+import tvm
+import numpy as np
+
+tgt_host = 'llvm'
+tgt = 'opencl'
+
+# Define a computation.
+n = tvm.var('n')
+a = tvm.placeholder((n,), name='a')
+b = tvm.placeholder((n,), name='b')
+c = tvm.compute(a.shape, lambda i: a[i] + b[i], name='c')
+
+# Make a schedule.
+s = tvm.create_schedule(c.op)
+px, x = s[c].split(c.op.axis[0], nparts=1)
+s[c].bind(px, tvm.thread_axis("pipeline"))
+
+# Make a executable code.
+fadd = tvm.build(s, [a, b, c], tgt, target_host=tgt_host, name='myadd')
+
+# Run.
+ctx = tvm.context(tgt, 0)
+n = 1024
+a = tvm.nd.array(np.random.uniform(size=n).astype(a.dtype), ctx)
+b = tvm.nd.array(np.random.uniform(size=n).astype(b.dtype), ctx)
+c = tvm.nd.array(np.zeros(n, dtype=c.dtype), ctx)
+fadd(a, b, c)```
+
+Setup
+-----
+
+- Install AOCL 17.1 on Ubuntu 16.04.4 LTS.
+- Install FPGA device driver.
+- Make ICD file.
+- Make FCD file.
+- Setup TVM with AOCL and OpenCL enabled.
+
+Emulation
+---------
+
+- Set environment variable.
+```export CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA=1```
+
+- Run software emulation
+```python emu-aocl-fpga.py```
diff --git a/src/runtime/opencl/opencl_common.h b/src/runtime/opencl/opencl_common.h
index 4b26264491a2..ebbc3ed140e6 100644
--- a/src/runtime/opencl/opencl_common.h
+++ b/src/runtime/opencl/opencl_common.h
@@ -143,7 +143,11 @@ class OpenCLWorkspace : public DeviceAPI {
   void Init(const std::string& type_key, const std::string& device_type,
             const std::string& platform_name = "");
   virtual void Init() {
+#ifndef AOCL_BOARD_NAME
     Init("opencl", "gpu");
+#else
+    Init("opencl", "accelerator");
+#endif
   }
   // Check whether the context is OpenCL or not.
   virtual bool IsOpenCLDevice(TVMContext ctx) {
diff --git a/src/runtime/opencl/opencl_device_api.cc b/src/runtime/opencl/opencl_device_api.cc
index ac9373f1375b..5a724abefbf0 100644
--- a/src/runtime/opencl/opencl_device_api.cc
+++ b/src/runtime/opencl/opencl_device_api.cc
@@ -208,7 +208,11 @@ std::vector<cl_device_id> GetDeviceIDs(
   cl_device_type dtype = CL_DEVICE_TYPE_ALL;
   if (device_type == "cpu") dtype = CL_DEVICE_TYPE_CPU;
   if (device_type == "gpu") dtype = CL_DEVICE_TYPE_GPU;
+#ifndef AOCL_BOARD_NAME
   if (device_type == "accelerator") dtype = CL_DEVICE_TYPE_ACCELERATOR;
+#else
+  if (device_type == "accelerator") dtype = CL_DEVICE_TYPE_DEFAULT;
+#endif
   cl_uint ret_size;
   cl_int code = clGetDeviceIDs(pid, dtype, 0, nullptr, &ret_size);
   std::vector<cl_device_id> ret;
diff --git a/src/runtime/opencl/opencl_module.cc b/src/runtime/opencl/opencl_module.cc
index 731877b90af7..acc2cf561b87 100644
--- a/src/runtime/opencl/opencl_module.cc
+++ b/src/runtime/opencl/opencl_module.cc
@@ -12,9 +12,6 @@
 #include "./opencl_common.h"
 #include "./opencl_module.h"
 
-#define STRINGIFY(s) #s
-#define TOSTRING(id) STRINGIFY(id)
-
 namespace tvm {
 namespace runtime {
 
@@ -251,7 +248,7 @@ void OpenCLModuleNode::OfflineCompile(cl::OpenCLWorkspace* w,
 
     // Compile the .cl file.
     std::string cmd = "aoc aocltmp.cl -march=emulator -board=";
-    cmd += TOSTRING(AOCL_BOARD_NAME_STR);
+    cmd += AOCL_BOARD_NAME;
     if (system(cmd.c_str()) != 0) {
       LOG(FATAL) << "OpenCL offline compilation error.";
     }

From edd043710bf8083238797f28efb137ef6a337197 Mon Sep 17 00:00:00 2001
From: ktabata <tabata.keiichi@lab.ntt.co.jp>
Date: Mon, 23 Jul 2018 16:08:45 +0900
Subject: [PATCH 05/25] tab to white spaces.

---
 src/runtime/opencl/opencl_module.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/runtime/opencl/opencl_module.cc b/src/runtime/opencl/opencl_module.cc
index acc2cf561b87..6759d39af50e 100644
--- a/src/runtime/opencl/opencl_module.cc
+++ b/src/runtime/opencl/opencl_module.cc
@@ -194,7 +194,7 @@ cl_kernel OpenCLModuleNode::InstallKernel(cl::OpenCLWorkspace* w,
         program_ = clCreateProgramWithSource(w->context, 1, &s, &len, &err);
         OPENCL_CHECK_ERROR(err);
 #else
-	OfflineCompile(w, t);
+        OfflineCompile(w, t);
 #endif
       }
     } else if (fmt_ == "xclbin" || fmt_ == "awsxclbin") {
@@ -234,7 +234,7 @@ cl_kernel OpenCLModuleNode::InstallKernel(cl::OpenCLWorkspace* w,
 }
 
 void OpenCLModuleNode::OfflineCompile(cl::OpenCLWorkspace* w,
-				      cl::OpenCLThreadEntry* t) {
+                                      cl::OpenCLThreadEntry* t) {
     // Write a .cl file.
     std::ofstream ofs("aocltmp.cl");
     if (!ofs) {

From aca96a50f544fe65ef9fd01df36aa1bea335c201 Mon Sep 17 00:00:00 2001
From: ktabata <tabata.keiichi@lab.ntt.co.jp>
Date: Mon, 23 Jul 2018 16:42:15 +0900
Subject: [PATCH 06/25] Fixed macro error.

---
 src/runtime/opencl/opencl_common.h  | 2 ++
 src/runtime/opencl/opencl_module.cc | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/src/runtime/opencl/opencl_common.h b/src/runtime/opencl/opencl_common.h
index ebbc3ed140e6..12dd0db1336b 100644
--- a/src/runtime/opencl/opencl_common.h
+++ b/src/runtime/opencl/opencl_common.h
@@ -289,7 +289,9 @@ class OpenCLModuleNode : public ModuleNode {
   // kernels build so far.
   std::vector<cl_kernel> kernels_;
 
+#ifdef AOCL_BOARD_NAME
   void OfflineCompile(cl::OpenCLWorkspace* w,  cl::OpenCLThreadEntry* t);
+#endif
 };
 
 }  // namespace runtime
diff --git a/src/runtime/opencl/opencl_module.cc b/src/runtime/opencl/opencl_module.cc
index 6759d39af50e..b1a0f77aa62c 100644
--- a/src/runtime/opencl/opencl_module.cc
+++ b/src/runtime/opencl/opencl_module.cc
@@ -233,6 +233,7 @@ cl_kernel OpenCLModuleNode::InstallKernel(cl::OpenCLWorkspace* w,
   return kernel;
 }
 
+#ifdef AOCL_BOARD_NAME
 void OpenCLModuleNode::OfflineCompile(cl::OpenCLWorkspace* w,
                                       cl::OpenCLThreadEntry* t) {
     // Write a .cl file.
@@ -278,6 +279,7 @@ void OpenCLModuleNode::OfflineCompile(cl::OpenCLWorkspace* w,
 
     delete[] buf;
 }
+#endif
 
 Module OpenCLModuleCreate(
     std::string data,

From 5016f73da1ac34e230336bbafbf6d08ff5db9ece Mon Sep 17 00:00:00 2001
From: ktabata <tabata.keiichi@lab.ntt.co.jp>
Date: Mon, 23 Jul 2018 17:08:01 +0900
Subject: [PATCH 07/25] Fixed submodule.

---
 dmlc-core | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dmlc-core b/dmlc-core
index 5abeceeaea21..e864aa6757cd 160000
--- a/dmlc-core
+++ b/dmlc-core
@@ -1 +1 @@
-Subproject commit 5abeceeaea21f9461792b130720fc215a86040f5
+Subproject commit e864aa6757cdbe78b1296fe5231fd3050b7802c3

From ed47e4afe84d7fc2ec381919fb08459b71006955 Mon Sep 17 00:00:00 2001
From: TABATA Keiichi <tabata.k.ichi@gmail.com>
Date: Wed, 25 Jul 2018 16:10:54 +0900
Subject: [PATCH 08/25] Implemented AOCLWorkspace.

---
 cmake/config.cmake                         |  6 +-
 cmake/modules/OpenCL.cmake                 |  5 +-
 docs/deploy/aocl_fpga.md                   | 76 +++++++++++++---------
 include/tvm/runtime/c_runtime_api.h        |  1 +
 python/tvm/_ffi/runtime_ctypes.py          |  2 +
 src/codegen/build_module.cc                |  3 +
 src/codegen/codegen_aocl.cc                | 54 +++++++++++++++
 src/codegen/opt/build_aocl_off.cc          | 21 ++++++
 src/pass/verify_memory.cc                  |  2 +-
 src/runtime/c_runtime_api.cc               |  1 +
 src/runtime/opencl/aocl/aocl_common.h      | 42 ++++++++++++
 src/runtime/opencl/aocl/aocl_device_api.cc | 44 +++++++++++++
 src/runtime/opencl/aocl/aocl_module.cc     | 58 +++++++++++++++++
 src/runtime/opencl/aocl/aocl_module.h      | 31 +++++++++
 src/runtime/opencl/opencl_common.h         |  4 --
 src/runtime/opencl/opencl_device_api.cc    |  4 --
 src/runtime/opencl/opencl_module.cc        | 54 +--------------
 17 files changed, 311 insertions(+), 97 deletions(-)
 create mode 100644 src/codegen/codegen_aocl.cc
 create mode 100644 src/codegen/opt/build_aocl_off.cc
 create mode 100644 src/runtime/opencl/aocl/aocl_common.h
 create mode 100644 src/runtime/opencl/aocl/aocl_device_api.cc
 create mode 100644 src/runtime/opencl/aocl/aocl_module.cc
 create mode 100644 src/runtime/opencl/aocl/aocl_module.h

diff --git a/cmake/config.cmake b/cmake/config.cmake
index 0eff4c325d3e..85c5102169a9 100644
--- a/cmake/config.cmake
+++ b/cmake/config.cmake
@@ -42,11 +42,7 @@ set(USE_ROCM OFF)
 # Whether enable SDAccel runtime
 set(USE_SDACCEL OFF)
 
-# Whether enable Intel FPGA SDK for OpenCL (AOCL) during compile,
-#
-# Possible values:
-# - OFF: disbale AOCL
-# - board_name: use specific board name for offline compilation
+# Whether enable Intel FPGA SDK for OpenCL (AOCL) runtime
 set(USE_AOCL OFF)
 
 # Whether enable OpenCL runtime
diff --git a/cmake/modules/OpenCL.cmake b/cmake/modules/OpenCL.cmake
index 22dc2223091c..947807f37209 100644
--- a/cmake/modules/OpenCL.cmake
+++ b/cmake/modules/OpenCL.cmake
@@ -21,11 +21,14 @@ endif(USE_SDACCEL)
 
 if(USE_AOCL)
   message(STATUS "Build with Intel FPGA SDK for OpenCL support")
-  add_definitions(-DAOCL_BOARD_NAME="${USE_AOCL}")
+  file(GLOB RUNTIME_SDACCEL_SRCS src/runtime/opencl/aocl/*.cc)
+  list(APPEND RUNTIME_SRCS ${RUNTIME_SDACCEL_SRCS})
   if(NOT USE_OPENCL)
     message(STATUS "Enable OpenCL support required for Intel FPGA SDK for OpenCL")
     set(USE_OPENCL ON)
   endif()
+else()
+  list(APPEND COMPILER_SRCS src/codegen/opt/build_aocl_off.cc)
 endif(USE_AOCL)
 
 if(USE_OPENCL)
diff --git a/docs/deploy/aocl_fpga.md b/docs/deploy/aocl_fpga.md
index 6ed626645d94..a73df5ab5ae8 100644
--- a/docs/deploy/aocl_fpga.md
+++ b/docs/deploy/aocl_fpga.md
@@ -5,52 +5,70 @@ TVM supports Intel FPGA SDK for OpenCL also known as AOCL.  Here is a tutorial f
 
 ***Note***: This feature is still experimental.  We cannot use AOCL to deploy an end to end neural networks for now.  In addition, we can only use AOCL's emulation mode for now.
 
-We use a python scripts for this tutorial.
+We use two python scripts for this tutorial.
 
-- emu-aocl-fpga.py
-```# -*- coding: utf-8 -*-
-import tvm
-import numpy as np
+- build.py - a script to synthesize FPGA bitstream.
+```import tvm
+
+tgt_host="llvm"
+tgt="aocl -device=de5net_a7"
+
+n = tvm.var("n")
+A = tvm.placeholder((n,), name='A')
+B = tvm.placeholder((n,), name='B')
+C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C")
+
+s = tvm.create_schedule(C.op)
+px, x = s[C].split(C.op.axis[0], nparts=1)
+
+s[C].bind(px, tvm.thread_axis("pipeline"))
+
+fadd = tvm.build(s, [A, B, C], tgt, target_host=tgt_host, name="myadd")
 
-tgt_host = 'llvm'
-tgt = 'opencl'
+fadd.save("myadd.o")
 
-# Define a computation.
-n = tvm.var('n')
-a = tvm.placeholder((n,), name='a')
-b = tvm.placeholder((n,), name='b')
-c = tvm.compute(a.shape, lambda i: a[i] + b[i], name='c')
+tvm.contrib.cc.create_shared("myadd.so", ["myadd.o"])
+)```
 
-# Make a schedule.
-s = tvm.create_schedule(c.op)
-px, x = s[c].split(c.op.axis[0], nparts=1)
-s[c].bind(px, tvm.thread_axis("pipeline"))
+- run.py - a script to use FPGA as an accelerator.
+```python
+import tvm
+import numpy as np
+import os
 
-# Make a executable code.
-fadd = tvm.build(s, [a, b, c], tgt, target_host=tgt_host, name='myadd')
+tgt="aocl -device=de5net_a7"
+
+fadd = tvm.module.load("myadd.so")
+fadd_dev = tvm.module.load("myadd.aocx")
+fadd.import_module(fadd_dev)
 
-# Run.
 ctx = tvm.context(tgt, 0)
+
 n = 1024
-a = tvm.nd.array(np.random.uniform(size=n).astype(a.dtype), ctx)
-b = tvm.nd.array(np.random.uniform(size=n).astype(b.dtype), ctx)
-c = tvm.nd.array(np.zeros(n, dtype=c.dtype), ctx)
-fadd(a, b, c)```
+a = tvm.nd.array(np.random.uniform(size=n).astype("float32"), ctx)
+b = tvm.nd.array(np.random.uniform(size=n).astype("float32"), ctx)
+c = tvm.nd.array(np.zeros(n, dtype="float32"), ctx)
+
+fadd(a, b, c)
+np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
+```
 
 Setup
 -----
 
 - Install AOCL 17.1 on Ubuntu 16.04.4 LTS.
 - Install FPGA device driver.
-- Make ICD file.
-- Make FCD file.
+- Make ICD file. (/etc/OpenCL/vendors/Altera.icd)
+- Make FCD file. (/opt/Intel/OpenCL/Boards/de5net.fcd)
 - Setup TVM with AOCL and OpenCL enabled.
 
 Emulation
 ---------
 
-- Set environment variable.
-```export CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA=1```
-
 - Run software emulation
-```python emu-aocl-fpga.py```
+```bash
+export CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA=1
+
+python build.py
+python run.py
+```
diff --git a/include/tvm/runtime/c_runtime_api.h b/include/tvm/runtime/c_runtime_api.h
index 6fb7a0f3f8b3..17d00bf479aa 100644
--- a/include/tvm/runtime/c_runtime_api.h
+++ b/include/tvm/runtime/c_runtime_api.h
@@ -60,6 +60,7 @@ typedef int64_t tvm_index_t;
 
 /*! \brief Extension device types in TVM */
 typedef enum {
+  kDLAOCL = 5,
   kDLSDAccel = 6,
   kDLVulkan = 7,
   kOpenGL = 11,
diff --git a/python/tvm/_ffi/runtime_ctypes.py b/python/tvm/_ffi/runtime_ctypes.py
index 612b54649d74..4c36e82a81ec 100644
--- a/python/tvm/_ffi/runtime_ctypes.py
+++ b/python/tvm/_ffi/runtime_ctypes.py
@@ -96,6 +96,7 @@ class TVMContext(ctypes.Structure):
         1 : 'cpu',
         2 : 'gpu',
         4 : 'opencl',
+        5 : 'aocl',
         6 : 'sdaccel',
         7 : 'vulkan',
         8 : 'metal',
@@ -113,6 +114,7 @@ class TVMContext(ctypes.Structure):
         'nvptx': 2,
         'cl': 4,
         'opencl': 4,
+        'aocl' : 5,
         'sdaccel': 6,
         'vulkan': 7,
         'metal': 8,
diff --git a/src/codegen/build_module.cc b/src/codegen/build_module.cc
index f9b6226f86c4..0252f0a1e3a9 100644
--- a/src/codegen/build_module.cc
+++ b/src/codegen/build_module.cc
@@ -91,6 +91,9 @@ Target CreateTarget(const std::string& target_name,
   } else if (target_name == "sdaccel") {
     t->device_type = kDLOpenCL;
     t->keys_array.push_back(ir::StringImm::make("sdaccel"));
+  } else if (target_name == "aocl") {
+    t->device_type = kDLOpenCL;
+    t->keys_array.push_back(ir::StringImm::make("aocl"));
   } else if (target_name == "opengl") {
     t->device_type = kOpenGL;
     t->keys_array.push_back(ir::StringImm::make("opengl"));
diff --git a/src/codegen/codegen_aocl.cc b/src/codegen/codegen_aocl.cc
new file mode 100644
index 000000000000..73056119a84d
--- /dev/null
+++ b/src/codegen/codegen_aocl.cc
@@ -0,0 +1,54 @@
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file codegen_aocl.cc
+ */
+#include <tvm/build_module.h>
+#include <vector>
+#include <string>
+#include "./codegen_opencl.h"
+#include "./build_common.h"
+#include "../runtime/opencl/aocl/aocl_module.h"
+#include "../runtime/file_util.h"
+
+namespace tvm {
+namespace codegen {
+
+runtime::Module BuildAOCL(Array<LoweredFunc> funcs, std::string target_str) {
+  // Get code.
+  using tvm::runtime::Registry;
+  bool output_ssa = false;
+  CodeGenOpenCL cg;
+  cg.Init(output_ssa);
+  for (LoweredFunc f : funcs) {
+    cg.AddFunction(f);
+  }
+  std::string code = cg.Finish();
+  if (const auto* f = Registry::Get("tvm_callback_opencl_postproc")) {
+    code = (*f)(code).operator std::string();
+  }
+
+  // Write a .cl file.
+  runtime::SaveBinaryToFile("aocl.cl", code.c_str());
+
+  // Compile the .cl file.
+  Target target = Target::create(target_str);
+  std::string cmd = "aoc aocl.cl -march=emulator -board=";
+  cmd += target->device_name;
+  if (system(cmd.c_str()) != 0) {
+    LOG(FATAL) << "OpenCL offline compilation error.";
+  }
+
+  // Read .aocx file
+  std::string aocxbin;
+  runtime::LoadBinaryFromFile("aocl.aocx", &aocxbin);
+
+  return AOCLModuleCreate(aocxbin, "aocx", ExtractFuncInfo(funcs), code);
+}
+
+TVM_REGISTER_API("codegen.build_aocl")
+.set_body([](TVMArgs args, TVMRetValue* rv) {
+    *rv = BuildAOCL(args[0], args[1]);
+  });
+
+}  // namespace codegen
+}  // namespace tvm
diff --git a/src/codegen/opt/build_aocl_off.cc b/src/codegen/opt/build_aocl_off.cc
new file mode 100644
index 000000000000..535036016247
--- /dev/null
+++ b/src/codegen/opt/build_aocl_off.cc
@@ -0,0 +1,21 @@
+/*!
+ *  Copyright (c) 2018 by Contributors
+ *  Optional module when build aocl is switched to off
+ */
+#include "../codegen_source_base.h"
+#include "../../runtime/opencl/opencl_module.h"
+
+namespace tvm {
+namespace runtime {
+
+Module AOCLModuleCreate(
+    std::string data,
+    std::string fmt,
+    std::unordered_map<std::string, FunctionInfo> fmap,
+    std::string source) {
+  LOG(WARNING) << "AOCL runtime not enabled, return a source module...";
+  return codegen::DeviceSourceModuleCreate(data, fmt, fmap, "aocl");
+}
+
+}  // namespace runtime
+}  // namespace tvm
diff --git a/src/pass/verify_memory.cc b/src/pass/verify_memory.cc
index 38cd05a4bdd0..bafaaa642ac7 100644
--- a/src/pass/verify_memory.cc
+++ b/src/pass/verify_memory.cc
@@ -145,7 +145,7 @@ class MemoryAccessVerifier final : protected IRVisitor {
   }
   /// Check if a given DLDeviceType/TVMDeviceExtType value denotes FPGA device.
   static bool IsFPGADevice(int dev_type) {
-    return kDLSDAccel == dev_type;
+    return kDLSDAccel == dev_type || kDLAOCL == dev_type;
   }
 
  private:
diff --git a/src/runtime/c_runtime_api.cc b/src/runtime/c_runtime_api.cc
index 7a7d7ab9f4db..916dfadecb4c 100644
--- a/src/runtime/c_runtime_api.cc
+++ b/src/runtime/c_runtime_api.cc
@@ -32,6 +32,7 @@ inline std::string DeviceName(int type) {
     case kDLGPU: return "gpu";
     case kDLOpenCL: return "opencl";
     case kDLSDAccel: return "sdaccel";
+    case kDLAOCL: return "aocl";
     case kDLVulkan: return "vulkan";
     case kDLMetal: return "metal";
     case kDLVPI: return "vpi";
diff --git a/src/runtime/opencl/aocl/aocl_common.h b/src/runtime/opencl/aocl/aocl_common.h
new file mode 100644
index 000000000000..81da940076e9
--- /dev/null
+++ b/src/runtime/opencl/aocl/aocl_common.h
@@ -0,0 +1,42 @@
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file sdaccel_common.h
+ * \brief SDAccel common header
+ */
+#ifndef TVM_RUNTIME_OPENCL_AOCL_AOCL_COMMON_H_
+#define TVM_RUNTIME_OPENCL_AOCL_AOCL_COMMON_H_
+
+#include "../opencl_common.h"
+
+namespace tvm {
+namespace runtime {
+namespace cl {
+
+/*!
+ * \brief Process global AOCL workspace.
+ */
+class AOCLWorkspace final : public OpenCLWorkspace {
+ public:
+  // override OpenCL device API
+  void Init() final;
+  bool IsOpenCLDevice(TVMContext ctx) final;
+  OpenCLThreadEntry* GetThreadEntry() final;
+  // get the global workspace
+  static const std::shared_ptr<OpenCLWorkspace>& Global();
+};
+
+
+/*! \brief Thread local workspace for AOCL */
+class AOCLThreadEntry : public OpenCLThreadEntry {
+ public:
+  // constructor
+  AOCLThreadEntry()
+      : OpenCLThreadEntry(static_cast<DLDeviceType>(kDLAOCL), AOCLWorkspace::Global()) {}
+
+  // get the global workspace
+  static AOCLThreadEntry* ThreadLocal();
+};
+}  // namespace cl
+}  // namespace runtime
+}  // namespace tvm
+#endif  // TVM_RUNTIME_OPENCL_AOCL_AOCL_COMMON_H_
diff --git a/src/runtime/opencl/aocl/aocl_device_api.cc b/src/runtime/opencl/aocl/aocl_device_api.cc
new file mode 100644
index 000000000000..5e724a244f25
--- /dev/null
+++ b/src/runtime/opencl/aocl/aocl_device_api.cc
@@ -0,0 +1,44 @@
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file aocl_device_api.cc
+ */
+#include <tvm/runtime/registry.h>
+#include <dmlc/thread_local.h>
+#include "./aocl_common.h"
+
+namespace tvm {
+namespace runtime {
+namespace cl {
+
+OpenCLThreadEntry* AOCLWorkspace::GetThreadEntry() {
+  return AOCLThreadEntry::ThreadLocal();
+}
+
+const std::shared_ptr<OpenCLWorkspace>& AOCLWorkspace::Global() {
+  static std::shared_ptr<OpenCLWorkspace> inst = std::make_shared<AOCLWorkspace>();
+  return inst;
+}
+
+void AOCLWorkspace::Init() {
+  OpenCLWorkspace::Init("aocl", "accelerator", "Intel");
+}
+
+bool AOCLWorkspace::IsOpenCLDevice(TVMContext ctx) {
+  return ctx.device_type == static_cast<DLDeviceType>(kDLAOCL);
+}
+
+typedef dmlc::ThreadLocalStore<AOCLThreadEntry> AOCLThreadStore;
+
+AOCLThreadEntry* AOCLThreadEntry::ThreadLocal() {
+  return AOCLThreadStore::Get();
+}
+
+TVM_REGISTER_GLOBAL("device_api.aocl")
+.set_body([](TVMArgs args, TVMRetValue* rv) {
+    DeviceAPI* ptr = AOCLWorkspace::Global().get();
+    *rv = static_cast<void*>(ptr);
+  });
+
+}  // namespace cl
+}  // namespace runtime
+}  // namespace tvm
diff --git a/src/runtime/opencl/aocl/aocl_module.cc b/src/runtime/opencl/aocl/aocl_module.cc
new file mode 100644
index 000000000000..a056c5cee671
--- /dev/null
+++ b/src/runtime/opencl/aocl/aocl_module.cc
@@ -0,0 +1,58 @@
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file aocl_module.cc
+ */
+#include <dmlc/memory_io.h>
+#include <tvm/runtime/registry.h>
+#include <vector>
+#include <string>
+#include <unordered_map>
+#include "./aocl_common.h"
+#include "./aocl_module.h"
+
+namespace tvm {
+namespace runtime {
+
+class AOCLModuleNode : public OpenCLModuleNode {
+ public:
+  explicit AOCLModuleNode(std::string data,
+                          std::string fmt,
+                          std::unordered_map<std::string, FunctionInfo> fmap,
+                          std::string source)
+      : OpenCLModuleNode(data, fmt, fmap, source) {}
+  const std::shared_ptr<cl::OpenCLWorkspace>& GetGlobalWorkspace() final;
+};
+
+const std::shared_ptr<cl::OpenCLWorkspace>& AOCLModuleNode::GetGlobalWorkspace() {
+  return cl::AOCLWorkspace::Global();
+}
+
+Module AOCLModuleCreate(
+    std::string data,
+    std::string fmt,
+    std::unordered_map<std::string, FunctionInfo> fmap,
+    std::string source) {
+  std::shared_ptr<AOCLModuleNode> n =
+      std::make_shared<AOCLModuleNode>(data, fmt, fmap, source);
+  n->Init();
+  return Module(n);
+}
+
+Module AOCLModuleLoadFile(const std::string& file_name,
+                          const std::string& format) {
+  std::string data;
+  std::unordered_map<std::string, FunctionInfo> fmap;
+  std::string fmt = GetFileFormat(file_name, format);
+  std::string meta_file = GetMetaFilePath(file_name);
+  LoadBinaryFromFile(file_name, &data);
+  LoadMetaDataFromFile(meta_file, &fmap);
+  return AOCLModuleCreate(data, fmt, fmap, std::string());
+}
+
+TVM_REGISTER_GLOBAL("module.loadfile_aocx")
+.set_body([](TVMArgs args, TVMRetValue* rv) {
+    *rv = AOCLModuleLoadFile(args[0], args[1]);
+  });
+
+}  // namespace runtime
+}  // namespace tvm
diff --git a/src/runtime/opencl/aocl/aocl_module.h b/src/runtime/opencl/aocl/aocl_module.h
new file mode 100644
index 000000000000..fba9be713486
--- /dev/null
+++ b/src/runtime/opencl/aocl/aocl_module.h
@@ -0,0 +1,31 @@
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file sdaccel_module.h
+ * \brief Execution handling of OPENCL kernels for SDAccel FPGAs
+ */
+#ifndef TVM_RUNTIME_OPENCL_AOCL_AOCL_MODULE_H_
+#define TVM_RUNTIME_OPENCL_AOCL_AOCL_MODULE_H_
+
+#include <tvm/runtime/packed_func.h>
+#include <memory>
+#include <vector>
+#include <string>
+#include "../../meta_data.h"
+
+namespace tvm {
+namespace runtime {
+/*!
+ * \brief create a opencl module for AOCL from data.
+ *
+ * \param data The module data.
+ * \param fmt The format of the data, can be "aocx"
+ * \param fmap The map function information map of each function.
+ */
+Module AOCLModuleCreate(
+    std::string data,
+    std::string fmt,
+    std::unordered_map<std::string, FunctionInfo> fmap,
+    std::string source);
+}  // namespace runtime
+}  // namespace tvm
+#endif  // TVM_RUNTIME_OPENCL_AOCL_AOCL_MODULE_H_
diff --git a/src/runtime/opencl/opencl_common.h b/src/runtime/opencl/opencl_common.h
index 12dd0db1336b..9e53de8dda67 100644
--- a/src/runtime/opencl/opencl_common.h
+++ b/src/runtime/opencl/opencl_common.h
@@ -143,11 +143,7 @@ class OpenCLWorkspace : public DeviceAPI {
   void Init(const std::string& type_key, const std::string& device_type,
             const std::string& platform_name = "");
   virtual void Init() {
-#ifndef AOCL_BOARD_NAME
     Init("opencl", "gpu");
-#else
-    Init("opencl", "accelerator");
-#endif
   }
   // Check whether the context is OpenCL or not.
   virtual bool IsOpenCLDevice(TVMContext ctx) {
diff --git a/src/runtime/opencl/opencl_device_api.cc b/src/runtime/opencl/opencl_device_api.cc
index 5a724abefbf0..ac9373f1375b 100644
--- a/src/runtime/opencl/opencl_device_api.cc
+++ b/src/runtime/opencl/opencl_device_api.cc
@@ -208,11 +208,7 @@ std::vector<cl_device_id> GetDeviceIDs(
   cl_device_type dtype = CL_DEVICE_TYPE_ALL;
   if (device_type == "cpu") dtype = CL_DEVICE_TYPE_CPU;
   if (device_type == "gpu") dtype = CL_DEVICE_TYPE_GPU;
-#ifndef AOCL_BOARD_NAME
   if (device_type == "accelerator") dtype = CL_DEVICE_TYPE_ACCELERATOR;
-#else
-  if (device_type == "accelerator") dtype = CL_DEVICE_TYPE_DEFAULT;
-#endif
   cl_uint ret_size;
   cl_int code = clGetDeviceIDs(pid, dtype, 0, nullptr, &ret_size);
   std::vector<cl_device_id> ret;
diff --git a/src/runtime/opencl/opencl_module.cc b/src/runtime/opencl/opencl_module.cc
index b1a0f77aa62c..3262a967a883 100644
--- a/src/runtime/opencl/opencl_module.cc
+++ b/src/runtime/opencl/opencl_module.cc
@@ -187,17 +187,13 @@ cl_kernel OpenCLModuleNode::InstallKernel(cl::OpenCLWorkspace* w,
     // create program
     if (fmt_ == "cl") {
       if (program_ == nullptr) {
-#ifndef AOCL_BOARD_NAME
         const char* s = data_.c_str();
         size_t len = data_.length();
         cl_int err;
         program_ = clCreateProgramWithSource(w->context, 1, &s, &len, &err);
         OPENCL_CHECK_ERROR(err);
-#else
-        OfflineCompile(w, t);
-#endif
       }
-    } else if (fmt_ == "xclbin" || fmt_ == "awsxclbin") {
+    } else if (fmt_ == "xclbin" || fmt_ == "awsxclbin" || fmt_ == "aocx") {
       const unsigned char* s = (const unsigned char *)data_.c_str();
       size_t len = data_.length();
       cl_int err;
@@ -233,54 +229,6 @@ cl_kernel OpenCLModuleNode::InstallKernel(cl::OpenCLWorkspace* w,
   return kernel;
 }
 
-#ifdef AOCL_BOARD_NAME
-void OpenCLModuleNode::OfflineCompile(cl::OpenCLWorkspace* w,
-                                      cl::OpenCLThreadEntry* t) {
-    // Write a .cl file.
-    std::ofstream ofs("aocltmp.cl");
-    if (!ofs) {
-      LOG(FATAL) << "Can't create OpenCL temporary file.";
-    }
-    ofs << data_.c_str();
-    if (!ofs) {
-      LOG(FATAL) << "Can't write to OpenCL temporary file.";
-    }
-    ofs.close();
-
-    // Compile the .cl file.
-    std::string cmd = "aoc aocltmp.cl -march=emulator -board=";
-    cmd += AOCL_BOARD_NAME;
-    if (system(cmd.c_str()) != 0) {
-      LOG(FATAL) << "OpenCL offline compilation error.";
-    }
-
-    // Read .aocx file
-    std::ifstream ifs("aocltmp.aocx", std::ios::in | std::ios::binary);
-    if (!ifs) {
-      LOG(FATAL) << "Can't open aocltmp.aocx file.";
-    }
-    ifs.seekg(0, std::fstream::end);
-    const size_t len = ifs.tellg();
-    char *buf = new char[len];
-    ifs.clear();
-    ifs.seekg(0, std::fstream::beg);
-    ifs.read(buf, len);
-    if (!ifs) {
-      LOG(FATAL) << "Can't read aocltmp.aocx file.";
-    }
-
-    // Create program from aocx.
-    cl_int err;
-    int device_id = t->context.device_id;
-    cl_device_id dev = w->devices[device_id];
-    const unsigned char* s = (const unsigned char *)buf;
-    program_ = clCreateProgramWithBinary(w->context, 1, &dev, &len, &s, NULL, &err);
-    OPENCL_CHECK_ERROR(err);
-
-    delete[] buf;
-}
-#endif
-
 Module OpenCLModuleCreate(
     std::string data,
     std::string fmt,

From d51f19e8f8523f75bb71b2773704f37e4131df72 Mon Sep 17 00:00:00 2001
From: TABATA Keiichi <tabata.k.ichi@gmail.com>
Date: Wed, 25 Jul 2018 16:17:22 +0900
Subject: [PATCH 09/25] Fixed document.

---
 docs/deploy/aocl_fpga.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/deploy/aocl_fpga.md b/docs/deploy/aocl_fpga.md
index a73df5ab5ae8..387b04baf55a 100644
--- a/docs/deploy/aocl_fpga.md
+++ b/docs/deploy/aocl_fpga.md
@@ -26,6 +26,7 @@ s[C].bind(px, tvm.thread_axis("pipeline"))
 fadd = tvm.build(s, [A, B, C], tgt, target_host=tgt_host, name="myadd")
 
 fadd.save("myadd.o")
+fadd.imported_modules[0].save("myadd.aocx")
 
 tvm.contrib.cc.create_shared("myadd.so", ["myadd.o"])
 )```

From 2c18ee09bf514fadfff3418c163b21cf7132a321 Mon Sep 17 00:00:00 2001
From: TABATA Keiichi <tabata.k.ichi@gmail.com>
Date: Wed, 25 Jul 2018 16:29:46 +0900
Subject: [PATCH 10/25] Fixed document.

---
 docs/deploy/aocl_fpga.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/deploy/aocl_fpga.md b/docs/deploy/aocl_fpga.md
index 387b04baf55a..e46ed8f13a91 100644
--- a/docs/deploy/aocl_fpga.md
+++ b/docs/deploy/aocl_fpga.md
@@ -67,8 +67,7 @@ Emulation
 ---------
 
 - Run software emulation
-```bash
-export CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA=1
+```export CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA=1
 
 python build.py
 python run.py

From fd3d4fae884c8a76fcbb714129df542f3c6c42b7 Mon Sep 17 00:00:00 2001
From: TABATA Keiichi <tabata.k.ichi@gmail.com>
Date: Wed, 25 Jul 2018 16:33:49 +0900
Subject: [PATCH 11/25] Deleted macro.

---
 src/runtime/opencl/opencl_common.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/runtime/opencl/opencl_common.h b/src/runtime/opencl/opencl_common.h
index 9e53de8dda67..c37dbaa94d7a 100644
--- a/src/runtime/opencl/opencl_common.h
+++ b/src/runtime/opencl/opencl_common.h
@@ -284,10 +284,6 @@ class OpenCLModuleNode : public ModuleNode {
   std::unordered_map<std::string, KTRefEntry> kid_map_;
   // kernels build so far.
   std::vector<cl_kernel> kernels_;
-
-#ifdef AOCL_BOARD_NAME
-  void OfflineCompile(cl::OpenCLWorkspace* w,  cl::OpenCLThreadEntry* t);
-#endif
 };
 
 }  // namespace runtime

From 5b578e22dc00bcea44984b29fbb79c36ec457859 Mon Sep 17 00:00:00 2001
From: TABATA Keiichi <tabata.k.ichi@gmail.com>
Date: Wed, 25 Jul 2018 16:35:47 +0900
Subject: [PATCH 12/25] Fixed file header.

---
 src/runtime/opencl/aocl/aocl_common.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/runtime/opencl/aocl/aocl_common.h b/src/runtime/opencl/aocl/aocl_common.h
index 81da940076e9..234053098d1d 100644
--- a/src/runtime/opencl/aocl/aocl_common.h
+++ b/src/runtime/opencl/aocl/aocl_common.h
@@ -1,7 +1,7 @@
 /*!
  *  Copyright (c) 2018 by Contributors
- * \file sdaccel_common.h
- * \brief SDAccel common header
+ * \file aocl_common.h
+ * \brief AOCL common header
  */
 #ifndef TVM_RUNTIME_OPENCL_AOCL_AOCL_COMMON_H_
 #define TVM_RUNTIME_OPENCL_AOCL_AOCL_COMMON_H_

From 66a46ea115af75c827696473569f72fcea4e6ae5 Mon Sep 17 00:00:00 2001
From: TABATA Keiichi <tabata.k.ichi@gmail.com>
Date: Wed, 25 Jul 2018 16:38:04 +0900
Subject: [PATCH 13/25] Fixed file header.

---
 src/runtime/opencl/aocl/aocl_module.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/runtime/opencl/aocl/aocl_module.h b/src/runtime/opencl/aocl/aocl_module.h
index fba9be713486..83ddbdb358ce 100644
--- a/src/runtime/opencl/aocl/aocl_module.h
+++ b/src/runtime/opencl/aocl/aocl_module.h
@@ -1,7 +1,7 @@
 /*!
  *  Copyright (c) 2018 by Contributors
- * \file sdaccel_module.h
- * \brief Execution handling of OPENCL kernels for SDAccel FPGAs
+ * \file aocl_module.h
+ * \brief Execution handling of OpenCL kernels for AOCL
  */
 #ifndef TVM_RUNTIME_OPENCL_AOCL_AOCL_MODULE_H_
 #define TVM_RUNTIME_OPENCL_AOCL_AOCL_MODULE_H_

From fb8036655ca47080bfb4a892cab8d11b2955649c Mon Sep 17 00:00:00 2001
From: TABATA Keiichi <tabata.k.ichi@gmail.com>
Date: Wed, 25 Jul 2018 16:39:15 +0900
Subject: [PATCH 14/25] Deleted includes.

---
 src/runtime/opencl/opencl_module.cc | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/runtime/opencl/opencl_module.cc b/src/runtime/opencl/opencl_module.cc
index 3262a967a883..3efd789513ba 100644
--- a/src/runtime/opencl/opencl_module.cc
+++ b/src/runtime/opencl/opencl_module.cc
@@ -6,8 +6,6 @@
 #include <tvm/runtime/registry.h>
 #include <vector>
 #include <string>
-#include <iostream>
-#include <fstream>
 #include <unordered_map>
 #include "./opencl_common.h"
 #include "./opencl_module.h"

From 0030c81a12e84ceb6bf4856483c24dc7c9aee74e Mon Sep 17 00:00:00 2001
From: TABATA Keiichi <tabata.k.ichi@gmail.com>
Date: Wed, 25 Jul 2018 16:41:06 +0900
Subject: [PATCH 15/25] Fixed OpenCL.cmake

---
 cmake/modules/OpenCL.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cmake/modules/OpenCL.cmake b/cmake/modules/OpenCL.cmake
index 947807f37209..b30df1864522 100644
--- a/cmake/modules/OpenCL.cmake
+++ b/cmake/modules/OpenCL.cmake
@@ -21,8 +21,8 @@ endif(USE_SDACCEL)
 
 if(USE_AOCL)
   message(STATUS "Build with Intel FPGA SDK for OpenCL support")
-  file(GLOB RUNTIME_SDACCEL_SRCS src/runtime/opencl/aocl/*.cc)
-  list(APPEND RUNTIME_SRCS ${RUNTIME_SDACCEL_SRCS})
+  file(GLOB RUNTIME_AOCL_SRCS src/runtime/opencl/aocl/*.cc)
+  list(APPEND RUNTIME_SRCS ${RUNTIME_AOCL_SRCS})
   if(NOT USE_OPENCL)
     message(STATUS "Enable OpenCL support required for Intel FPGA SDK for OpenCL")
     set(USE_OPENCL ON)

From 65ad10d75fac078fe884919d6c6f22b727680ad9 Mon Sep 17 00:00:00 2001
From: TABATA Keiichi <tabata.k.ichi@gmail.com>
Date: Wed, 25 Jul 2018 17:27:44 +0900
Subject: [PATCH 16/25] Fixed platform name for AOCL.

---
 src/runtime/opencl/aocl/aocl_device_api.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/runtime/opencl/aocl/aocl_device_api.cc b/src/runtime/opencl/aocl/aocl_device_api.cc
index 5e724a244f25..e9cbc6b4cda0 100644
--- a/src/runtime/opencl/aocl/aocl_device_api.cc
+++ b/src/runtime/opencl/aocl/aocl_device_api.cc
@@ -20,7 +20,7 @@ const std::shared_ptr<OpenCLWorkspace>& AOCLWorkspace::Global() {
 }
 
 void AOCLWorkspace::Init() {
-  OpenCLWorkspace::Init("aocl", "accelerator", "Intel");
+  OpenCLWorkspace::Init("aocl", "accelerator", "Intel(R) FPGA SDK for OpenCL(TM)");
 }
 
 bool AOCLWorkspace::IsOpenCLDevice(TVMContext ctx) {

From d3ab18aec08efef102f86c67312ee7a8a4687d2e Mon Sep 17 00:00:00 2001
From: TABATA Keiichi <tabata.k.ichi@gmail.com>
Date: Wed, 25 Jul 2018 17:28:14 +0900
Subject: [PATCH 17/25] Fixed device type.

---
 src/codegen/build_module.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/codegen/build_module.cc b/src/codegen/build_module.cc
index 0252f0a1e3a9..49b86626add0 100644
--- a/src/codegen/build_module.cc
+++ b/src/codegen/build_module.cc
@@ -92,7 +92,7 @@ Target CreateTarget(const std::string& target_name,
     t->device_type = kDLOpenCL;
     t->keys_array.push_back(ir::StringImm::make("sdaccel"));
   } else if (target_name == "aocl") {
-    t->device_type = kDLOpenCL;
+    t->device_type = kDLAOCL;
     t->keys_array.push_back(ir::StringImm::make("aocl"));
   } else if (target_name == "opengl") {
     t->device_type = kOpenGL;

From 3a49eee2177c260e4797b78a9ae077f04375116e Mon Sep 17 00:00:00 2001
From: TABATA Keiichi <tabata.k.ichi@gmail.com>
Date: Wed, 25 Jul 2018 18:00:22 +0900
Subject: [PATCH 18/25] Fixed document.

---
 docs/deploy/aocl_fpga.md | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/docs/deploy/aocl_fpga.md b/docs/deploy/aocl_fpga.md
index e46ed8f13a91..4eaa8d8ec745 100644
--- a/docs/deploy/aocl_fpga.md
+++ b/docs/deploy/aocl_fpga.md
@@ -58,9 +58,12 @@ Setup
 -----
 
 - Install AOCL 17.1 on Ubuntu 16.04.4 LTS.
+- Install BSP for your FPGA device.
 - Install FPGA device driver.
-- Make ICD file. (/etc/OpenCL/vendors/Altera.icd)
-- Make FCD file. (/opt/Intel/OpenCL/Boards/de5net.fcd)
+- Create an ICD file at /etc/OpenCL/vendors/Altera.icd so that the OpenCL platform can be found.
+```/opt/intelFPGA/17.1/hld/linux64/lib/libalteracl.so```
+- Create an FCD file for example at /opt/Intel/OpenCL/Boards/de5net.fcd so that your FPGA device can be found.
+```/opt/intelFPGA/17.1/hld/board/terasic/linux64/lib/libterasic_apb_16_0_mmd.so```
 - Setup TVM with AOCL and OpenCL enabled.
 
 Emulation

From 93d8d9fa8881aa0abc4302ef77fbeb48998c51d2 Mon Sep 17 00:00:00 2001
From: TABATA Keiichi <tabata.k.ichi@gmail.com>
Date: Wed, 25 Jul 2018 18:17:46 +0900
Subject: [PATCH 19/25] Added -mattr=emulator option.

---
 docs/deploy/aocl_fpga.md    |  4 ++--
 src/codegen/codegen_aocl.cc | 10 ++++++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/docs/deploy/aocl_fpga.md b/docs/deploy/aocl_fpga.md
index 4eaa8d8ec745..dc65d4a0daf7 100644
--- a/docs/deploy/aocl_fpga.md
+++ b/docs/deploy/aocl_fpga.md
@@ -3,7 +3,7 @@ AOCL Backend Example
 
 TVM supports Intel FPGA SDK for OpenCL also known as AOCL.  Here is a tutorial for how to use TVM with AOCL.
 
-***Note***: This feature is still experimental.  We cannot use AOCL to deploy an end to end neural networks for now.  In addition, we can only use AOCL's emulation mode for now.
+***Note***: This feature is still experimental.  We cannot use AOCL to deploy an end to end neural networks for now.  For now, we only tested compilation for emulation mode of AOCL.
 
 We use two python scripts for this tutorial.
 
@@ -37,7 +37,7 @@ import tvm
 import numpy as np
 import os
 
-tgt="aocl -device=de5net_a7"
+tgt="aocl -device=de5net_a7 -mattr=emulator"
 
 fadd = tvm.module.load("myadd.so")
 fadd_dev = tvm.module.load("myadd.aocx")
diff --git a/src/codegen/codegen_aocl.cc b/src/codegen/codegen_aocl.cc
index 73056119a84d..7b8fca239a72 100644
--- a/src/codegen/codegen_aocl.cc
+++ b/src/codegen/codegen_aocl.cc
@@ -32,8 +32,14 @@ runtime::Module BuildAOCL(Array<LoweredFunc> funcs, std::string target_str) {
 
   // Compile the .cl file.
   Target target = Target::create(target_str);
-  std::string cmd = "aoc aocl.cl -march=emulator -board=";
-  cmd += target->device_name;
+  if (target->device_name == "") {
+    LOG(FATAL) << "AOCL device name not specified in build target.";
+  }
+  std::string cmd = "aoc aocl.cl";
+  if (target_str.find("-mattr=emulator") != std::string::npos) {
+    cmd += " -march=emulator";
+  }
+  cmd += " -board=" + target->device_name;
   if (system(cmd.c_str()) != 0) {
     LOG(FATAL) << "OpenCL offline compilation error.";
   }

From b174098590381796291954eee38136a8a40f59c5 Mon Sep 17 00:00:00 2001
From: TABATA Keiichi <tabata.k.ichi@gmail.com>
Date: Wed, 25 Jul 2018 22:00:56 +0900
Subject: [PATCH 20/25] Fixed documentation.

---
 docs/deploy/aocl_fpga.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/deploy/aocl_fpga.md b/docs/deploy/aocl_fpga.md
index dc65d4a0daf7..7e5a5e10331d 100644
--- a/docs/deploy/aocl_fpga.md
+++ b/docs/deploy/aocl_fpga.md
@@ -11,7 +11,7 @@ We use two python scripts for this tutorial.
 ```import tvm
 
 tgt_host="llvm"
-tgt="aocl -device=de5net_a7"
+tgt="aocl -device=de5net_a7 -mattr=emulator"
 
 n = tvm.var("n")
 A = tvm.placeholder((n,), name='A')

From 68569d9f58e96a4d2726b6386b5fee4a7cee1524 Mon Sep 17 00:00:00 2001
From: TABATA Keiichi <tabata.k.ichi@gmail.com>
Date: Wed, 25 Jul 2018 22:16:20 +0900
Subject: [PATCH 21/25] Fixed documentation.

---
 docs/deploy/aocl_fpga.md | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/docs/deploy/aocl_fpga.md b/docs/deploy/aocl_fpga.md
index 7e5a5e10331d..73637c43110b 100644
--- a/docs/deploy/aocl_fpga.md
+++ b/docs/deploy/aocl_fpga.md
@@ -8,7 +8,8 @@ TVM supports Intel FPGA SDK for OpenCL also known as AOCL.  Here is a tutorial f
 We use two python scripts for this tutorial.
 
 - build.py - a script to synthesize FPGA bitstream.
-```import tvm
+```
+import tvm
 
 tgt_host="llvm"
 tgt="aocl -device=de5net_a7 -mattr=emulator"
@@ -29,10 +30,10 @@ fadd.save("myadd.o")
 fadd.imported_modules[0].save("myadd.aocx")
 
 tvm.contrib.cc.create_shared("myadd.so", ["myadd.o"])
-)```
+```
 
 - run.py - a script to use FPGA as an accelerator.
-```python
+```
 import tvm
 import numpy as np
 import os
@@ -61,9 +62,13 @@ Setup
 - Install BSP for your FPGA device.
 - Install FPGA device driver.
 - Create an ICD file at /etc/OpenCL/vendors/Altera.icd so that the OpenCL platform can be found.
-```/opt/intelFPGA/17.1/hld/linux64/lib/libalteracl.so```
+```
+/opt/intelFPGA/17.1/hld/linux64/lib/libalteracl.so
+```
 - Create an FCD file for example at /opt/Intel/OpenCL/Boards/de5net.fcd so that your FPGA device can be found.
-```/opt/intelFPGA/17.1/hld/board/terasic/linux64/lib/libterasic_apb_16_0_mmd.so```
+```
+/opt/intelFPGA/17.1/hld/board/terasic/linux64/lib/libterasic_apb_16_0_mmd.so
+```
 - Setup TVM with AOCL and OpenCL enabled.
 
 Emulation
@@ -75,3 +80,12 @@ Emulation
 python build.py
 python run.py
 ```
+
+- Run on FPGA devices (not tested)
+    - Change tgt value to "aocl -device=de5net_a7" on build.py and run.py
+```
+unset CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA
+
+python build.py
+python run.py
+```

From d4d435f5a8e2b40570475c8e46e2462ae0548ba9 Mon Sep 17 00:00:00 2001
From: TABATA Keiichi <tabata.k.ichi@gmail.com>
Date: Wed, 25 Jul 2018 22:17:34 +0900
Subject: [PATCH 22/25] Fixed documentation.

---
 docs/deploy/aocl_fpga.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/deploy/aocl_fpga.md b/docs/deploy/aocl_fpga.md
index 73637c43110b..85f6a2b4e76f 100644
--- a/docs/deploy/aocl_fpga.md
+++ b/docs/deploy/aocl_fpga.md
@@ -3,7 +3,7 @@ AOCL Backend Example
 
 TVM supports Intel FPGA SDK for OpenCL also known as AOCL.  Here is a tutorial for how to use TVM with AOCL.
 
-***Note***: This feature is still experimental.  We cannot use AOCL to deploy an end to end neural networks for now.  For now, we only tested compilation for emulation mode of AOCL.
+***Note***: This feature is still experimental.  We cannot use AOCL to deploy an end to end neural networks for now.  In addition, we only tested compilation for emulation mode of AOCL.
 
 We use two python scripts for this tutorial.
 

From 9d45ead569fc056a8a7fc3cd6996a07468884980 Mon Sep 17 00:00:00 2001
From: TABATA Keiichi <tabata.k.ichi@gmail.com>
Date: Wed, 25 Jul 2018 22:19:31 +0900
Subject: [PATCH 23/25] Fixed documentation.

---
 docs/deploy/aocl_fpga.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/deploy/aocl_fpga.md b/docs/deploy/aocl_fpga.md
index 85f6a2b4e76f..71be23d93a4d 100644
--- a/docs/deploy/aocl_fpga.md
+++ b/docs/deploy/aocl_fpga.md
@@ -75,7 +75,8 @@ Emulation
 ---------
 
 - Run software emulation
-```export CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA=1
+```
+export CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA=1
 
 python build.py
 python run.py

From 8ff0272c5114f44318ba619675968e67fd5dacc0 Mon Sep 17 00:00:00 2001
From: TABATA Keiichi <tabata.k.ichi@gmail.com>
Date: Fri, 27 Jul 2018 13:44:25 +0900
Subject: [PATCH 24/25] Use s5_ref for target device.

---
 docs/deploy/aocl_fpga.md    | 10 +++++-----
 src/codegen/codegen_aocl.cc |  6 ++++--
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/docs/deploy/aocl_fpga.md b/docs/deploy/aocl_fpga.md
index 71be23d93a4d..bd0dae97879d 100644
--- a/docs/deploy/aocl_fpga.md
+++ b/docs/deploy/aocl_fpga.md
@@ -12,7 +12,7 @@ We use two python scripts for this tutorial.
 import tvm
 
 tgt_host="llvm"
-tgt="aocl -device=de5net_a7 -mattr=emulator"
+tgt="aocl -device=s5_ref -mattr=emulator"
 
 n = tvm.var("n")
 A = tvm.placeholder((n,), name='A')
@@ -38,7 +38,7 @@ import tvm
 import numpy as np
 import os
 
-tgt="aocl -device=de5net_a7 -mattr=emulator"
+tgt="aocl -device=s5_ref -mattr=emulator"
 
 fadd = tvm.module.load("myadd.so")
 fadd_dev = tvm.module.load("myadd.aocx")
@@ -65,9 +65,9 @@ Setup
 ```
 /opt/intelFPGA/17.1/hld/linux64/lib/libalteracl.so
 ```
-- Create an FCD file for example at /opt/Intel/OpenCL/Boards/de5net.fcd so that your FPGA device can be found.
+- Create an FCD file for example at /opt/Intel/OpenCL/Boards/s5_ref.fcd so that your FPGA device can be found.
 ```
-/opt/intelFPGA/17.1/hld/board/terasic/linux64/lib/libterasic_apb_16_0_mmd.so
+/opt/intelFPGA/17.1/hld/board/s5_ref/linux64/lib/libaltera_s5_ref_mmd.so
 ```
 - Setup TVM with AOCL and OpenCL enabled.
 
@@ -83,7 +83,7 @@ python run.py
 ```
 
 - Run on FPGA devices (not tested)
-    - Change tgt value to "aocl -device=de5net_a7" on build.py and run.py
+    - Change tgt value to "aocl -device=s5_ref" on build.py and run.py
 ```
 unset CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA
 
diff --git a/src/codegen/codegen_aocl.cc b/src/codegen/codegen_aocl.cc
index 7b8fca239a72..8830588758ef 100644
--- a/src/codegen/codegen_aocl.cc
+++ b/src/codegen/codegen_aocl.cc
@@ -36,8 +36,10 @@ runtime::Module BuildAOCL(Array<LoweredFunc> funcs, std::string target_str) {
     LOG(FATAL) << "AOCL device name not specified in build target.";
   }
   std::string cmd = "aoc aocl.cl";
-  if (target_str.find("-mattr=emulator") != std::string::npos) {
-    cmd += " -march=emulator";
+  for (std::string option : target->options()) {
+    if (option == "-mattr=emulator") {
+      cmd += " -march=emulator";
+    }
   }
   cmd += " -board=" + target->device_name;
   if (system(cmd.c_str()) != 0) {

From f7317b0eb5b33f4fb99efd9fdb71f20ffb229d7f Mon Sep 17 00:00:00 2001
From: TABATA Keiichi <tabata.k.ichi@gmail.com>
Date: Sat, 28 Jul 2018 18:45:35 +0900
Subject: [PATCH 25/25] Added testcases.

---
 tests/python/integration/test_ewise_fpga.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/python/integration/test_ewise_fpga.py b/tests/python/integration/test_ewise_fpga.py
index c0a568faca2d..0abefff02778 100644
--- a/tests/python/integration/test_ewise_fpga.py
+++ b/tests/python/integration/test_ewise_fpga.py
@@ -3,6 +3,7 @@
 import os
 
 os.environ["XCL_EMULATION_MODE"] = "1"
+os.environ["CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA"] = "1"
 
 @tvm.register_func
 def tvm_callback_vhls_postproc(code):
@@ -43,6 +44,7 @@ def check_device(device, host="llvm"):
     if "AWS_PLATFORM" in os.environ:
         check_device("sdaccel -device=" + os.environ.get("AWS_PLATFORM"))
 
+    check_device("aocl -device=s5_ref -mattr=emulator")
 
 def test_multi_kernel():
     # graph
@@ -80,6 +82,7 @@ def check_device(device, host="llvm"):
             d.asnumpy(), a.asnumpy() * 2 + b.asnumpy(), rtol=1e-5)
 
     check_device("sdaccel")
+    check_device("aocl -device=s5_ref -mattr=emulator")
 
 
 if __name__ == "__main__":