Skip to content
This repository has been archived by the owner on Jan 10, 2023. It is now read-only.

AVX512 support for Myelin #185

Merged
merged 2 commits into from
May 17, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added python/myelin/__init__.py
Empty file.
41 changes: 26 additions & 15 deletions sling/myelin/express.cc
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,13 @@ class RecipeParser {
}
next();

// Parse single qualifier (only used for testing).
bool single = false;
if (is('\'')) {
single = true;
next();
}

// Parse variable id.
int id = 0;
int digits = 0;
Expand All @@ -295,8 +302,9 @@ class RecipeParser {
if (digits == 0) Error("Variable id expected in expression");

// Return variable.
// type could be unitialized at this point
return expr_->Variable(type, id);
Express::Var *var = expr_->Variable(type, id);
var->single = single;
return var;
}

// Output error.
Expand Down Expand Up @@ -674,16 +682,10 @@ bool Express::TryToEliminateOps() {
}

void Express::Hoist(int limit) {
// Collect all existing cached variables.
std::set<Var *> cached;
// Collect all existing hoisted variables.
std::set<Var *> hoisted;
for (int i = 0; i < body_; ++i) {
cached.insert(ops_[i]->result);
}

// Single element inputs and constants are also considered as cached since
// these are by definition loop invariant.
for (Var *var : vars_) {
if (var->type == NUMBER || var->single) cached.insert(var);
hoisted.insert(ops_[i]->result);
}

// Hoist const loads outside the body until limit reached.
Expand All @@ -693,7 +695,7 @@ void Express::Hoist(int limit) {
Var *candidate = nullptr;
for (Var *v : vars_) {
if (v->type == CONST || v->type == NUMBER) {
if (cached.count(v) == 0) {
if (hoisted.count(v) == 0) {
if (candidate == nullptr || v->usages() > candidate->usages()) {
candidate = v;
}
Expand All @@ -720,11 +722,20 @@ void Express::Hoist(int limit) {
assign->Assign(temp);
assign->AddArgument(candidate);
body_++;
cached.insert(candidate);
hoisted.insert(candidate);
hoisted.insert(temp);
new_temps++;
}
if (new_temps > 0) CompactTempVars();

// Single element inputs and constants are also considered hoisted since
// these are by definition loop invariant.
for (Var *var : vars_) {
if (var->type == NUMBER || var->type == CONST || var->single) {
hoisted.insert(var);
}
}

// Hoist loop-invariant operations.
bool again = true;
while (again) {
Expand All @@ -735,7 +746,7 @@ void Express::Hoist(int limit) {
// Check if all arguments are cached.
bool invariant = true;
for (Var *arg : op->args) {
if (cached.count(arg) == 0) {
if (hoisted.count(arg) == 0) {
invariant = false;
break;
}
Expand All @@ -745,7 +756,7 @@ void Express::Hoist(int limit) {
if (invariant) {
for (int j = i; j > body_; --j) ops_[j] = ops_[j - 1];
ops_[body_++] = op;
cached.insert(op->result);
hoisted.insert(op->result);
again = true;
break;
}
Expand Down
1 change: 1 addition & 0 deletions sling/myelin/generator/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ cc_library(
"vector-flt-sse.cc",
"vector-flt-avx128.cc",
"vector-flt-avx256.cc",
"vector-flt-avx512.cc",
"scalar-int.cc",
"vector-int-sse.cc",
"vector-int-avx128.cc",
Expand Down
7 changes: 7 additions & 0 deletions sling/myelin/generator/elementwise.cc
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,13 @@ ElementwiseIndexGenerator::Locator *ElementwiseIndexGenerator::GetLocator(
<< " input: " << var->shape().ToString()
<< " output: " << shape_.ToString();
}
} else if (var->shape().outer(shape_.rank()) == 1) {
// The variable shape prefix is a one vector so use a simple iterator.
loc->iterator = GetIterator(SIMPLE, var->elements());
} else {
LOG(FATAL) << "Unsupported iterator: " << var->name() << " with shape "
<< var->shape().ToString()
<< " to output shape " << shape_.ToString();
}

return loc;
Expand Down
209 changes: 207 additions & 2 deletions sling/myelin/generator/expression.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ ExpressionGenerator *CreateVectorFltSSEGenerator();
ExpressionGenerator *CreateScalarFltAVXGenerator();
ExpressionGenerator *CreateVectorFltAVX128Generator();
ExpressionGenerator *CreateVectorFltAVX256Generator();
ExpressionGenerator *CreateVectorFltAVX512Generator();
ExpressionGenerator *CreateScalarIntGenerator();
ExpressionGenerator *CreateVectorIntSSEGenerator();
ExpressionGenerator *CreateVectorIntAVX128Generator();
Expand All @@ -41,6 +42,7 @@ void ExpressionGenerator::Initialize(const Express &expression,
expression_.Copy(expression);
type_ = type;
index_ = index;
index_->set_extended_regs(ExtendedRegs());

// Optimize expression.
bool fma = model_.fm_reg_reg_reg;
Expand Down Expand Up @@ -83,7 +85,17 @@ ExpressionGenerator *ExpressionGenerator::Select(const Express &expr,
ExpressionGenerator *generator = nullptr;
switch (type) {
case DT_FLOAT:
if (CPU::Enabled(AVX)) {
if (CPU::Enabled(AVX512F)) {
if (IsVector(size, 16)) {
generator = CreateVectorFltAVX512Generator();
} else if (IsVector(size, 8)) {
generator = CreateVectorFltAVX256Generator();
} else if (IsVector(size, 4)) {
generator = CreateVectorFltAVX128Generator();
} else {
generator = CreateScalarFltAVXGenerator();
}
} else if (CPU::Enabled(AVX)) {
if (IsVector(size, 8)) {
generator = CreateVectorFltAVX256Generator();
} else if (IsVector(size, 4)) {
Expand All @@ -101,7 +113,17 @@ ExpressionGenerator *ExpressionGenerator::Select(const Express &expr,
break;

case DT_DOUBLE:
if (CPU::Enabled(AVX)) {
if (CPU::Enabled(AVX512F)) {
if (IsVector(size, 8)) {
generator = CreateVectorFltAVX512Generator();
} else if (IsVector(size, 4)) {
generator = CreateVectorFltAVX256Generator();
} else if (IsVector(size, 2)) {
generator = CreateVectorFltAVX128Generator();
} else {
generator = CreateScalarFltAVXGenerator();
}
} else if (CPU::Enabled(AVX)) {
if (IsVector(size, 4)) {
generator = CreateVectorFltAVX256Generator();
} else if (IsVector(size, 2)) {
Expand Down Expand Up @@ -399,6 +421,66 @@ void ExpressionGenerator::GenerateYMMVectorMove(
}
}

void ExpressionGenerator::GenerateZMMMoveMemToReg(
ZMMRegister dst,
const Operand &src,
MacroAssembler *masm) {
switch (type_) {
case DT_FLOAT:
__ vmovaps(dst, src);
break;
case DT_DOUBLE:
__ vmovapd(dst, src);
break;
default: UNSUPPORTED;
}
}

void ExpressionGenerator::GenerateZMMVectorMove(
Express::Op *instr,
MacroAssembler *masm) {
if (instr->dst != -1 && instr->src != -1) {
// MOV reg,reg
switch (type_) {
case DT_FLOAT:
__ vmovaps(zmm(instr->dst), zmm(instr->src));
break;
case DT_DOUBLE:
__ vmovapd(zmm(instr->dst), zmm(instr->src));
break;
default: UNSUPPORTED;
}
} else if (instr->dst != -1 && instr->src == -1) {
// MOV reg,[mem]
if (index_->NeedsBroadcast(instr->args[0])) {
switch (type_) {
case DT_FLOAT:
__ vbroadcastss(zmm(instr->dst), addr(instr->args[0]));
break;
case DT_DOUBLE:
__ vbroadcastsd(zmm(instr->dst), addr(instr->args[0]));
break;
default: UNSUPPORTED;
}
} else {
GenerateZMMMoveMemToReg(zmm(instr->dst), addr(instr->args[0]), masm);
}
} else if (instr->dst == -1 && instr->src != -1) {
// MOV [mem],reg
switch (type_) {
case DT_FLOAT:
__ vmovaps(addr(instr->result), zmm(instr->src));
break;
case DT_DOUBLE:
__ vmovapd(addr(instr->result), zmm(instr->src));
break;
default: UNSUPPORTED;
}
} else {
UNSUPPORTED;
}
}

void ExpressionGenerator::GenerateIntMoveMemToReg(
Register dst, const Operand &src,
MacroAssembler *masm) {
Expand Down Expand Up @@ -858,6 +940,129 @@ void ExpressionGenerator::GenerateYMMFltOp(
}
}

void ExpressionGenerator::GenerateZMMFltOp(
Express::Op *instr,
OpZMMRegReg fltopreg, OpZMMRegReg dblopreg,
OpZMMRegRegR fltopregr, OpZMMRegRegR dblopregr,
OpZMMRegMem fltopmem, OpZMMRegMem dblopmem,
MacroAssembler *masm, int argnum) {
if (instr->dst != -1 && instr->src != -1) {
// OP reg,reg
switch (type_) {
case DT_FLOAT:
if (fltopreg != nullptr) {
(masm->*fltopreg)(zmm(instr->dst), zmm(instr->src), nomask);
} else {
(masm->*fltopregr)(zmm(instr->dst), zmm(instr->src), nomask, noround);
}
break;
case DT_DOUBLE:
if (dblopreg != nullptr) {
(masm->*dblopreg)(zmm(instr->dst), zmm(instr->src), nomask);
} else {
(masm->*dblopregr)(zmm(instr->dst), zmm(instr->src), nomask, noround);
}
break;
default: UNSUPPORTED;
}
} else if (instr->dst != -1 && instr->src == -1) {
// OP reg,[mem]
switch (type_) {
case DT_FLOAT:
(masm->*fltopmem)(zmm(instr->dst), addr(instr->args[argnum]), nomask);
break;
case DT_DOUBLE:
(masm->*dblopmem)(zmm(instr->dst), addr(instr->args[argnum]), nomask);
break;
default: UNSUPPORTED;
}
} else {
UNSUPPORTED;
}
}

void ExpressionGenerator::GenerateZMMFltOp(
Express::Op *instr,
OpZMMRegRegImm fltopreg, OpZMMRegRegImm dblopreg,
OpZMMRegMemImm fltopmem, OpZMMRegMemImm dblopmem,
int8 imm,
MacroAssembler *masm, int argnum) {
if (instr->dst != -1 && instr->src != -1) {
// OP reg,reg,imm
switch (type_) {
case DT_FLOAT:
(masm->*fltopreg)(zmm(instr->dst), zmm(instr->src), imm, nomask);
break;
case DT_DOUBLE:
(masm->*dblopreg)(zmm(instr->dst), zmm(instr->src), imm, nomask);
break;
default: UNSUPPORTED;
}
} else if (instr->dst != -1 && instr->src == -1) {
// OP reg,reg,[mem]
switch (type_) {
case DT_FLOAT:
(masm->*fltopmem)(zmm(instr->dst), addr(instr->args[argnum]), imm,
nomask);
break;
case DT_DOUBLE:
(masm->*dblopmem)(zmm(instr->dst), addr(instr->args[argnum]), imm,
nomask);
break;
default: UNSUPPORTED;
}
} else {
UNSUPPORTED;
}
}

void ExpressionGenerator::GenerateZMMFltOp(
Express::Op *instr,
OpZMMRegRegReg fltopreg, OpZMMRegRegReg dblopreg,
OpZMMRegRegRegR fltopregr, OpZMMRegRegRegR dblopregr,
OpZMMRegRegMem fltopmem, OpZMMRegRegMem dblopmem,
MacroAssembler *masm, int argnum) {
if (instr->dst != -1 && instr->src != -1 && instr->src2 != -1) {
// OP reg,reg,reg
switch (type_) {
case DT_FLOAT:
if (fltopreg != nullptr) {
(masm->*fltopreg)(zmm(instr->dst), zmm(instr->src), zmm(instr->src2),
nomask);
} else {
(masm->*fltopregr)(zmm(instr->dst), zmm(instr->src), zmm(instr->src2),
nomask, noround);
}
break;
case DT_DOUBLE:
if (dblopreg != nullptr) {
(masm->*dblopreg)(zmm(instr->dst), zmm(instr->src), zmm(instr->src2),
nomask);
} else {
(masm->*dblopregr)(zmm(instr->dst), zmm(instr->src), zmm(instr->src2),
nomask, noround);
}
break;
default: UNSUPPORTED;
}
} else if (instr->dst != -1 && instr->src != -1 && instr->src2 == -1) {
// OP reg,reg,[mem]
switch (type_) {
case DT_FLOAT:
(masm->*fltopmem)(zmm(instr->dst), zmm(instr->src),
addr(instr->args[argnum]), nomask);
break;
case DT_DOUBLE:
(masm->*dblopmem)(zmm(instr->dst), zmm(instr->src),
addr(instr->args[argnum]), nomask);
break;
default: UNSUPPORTED;
}
} else {
UNSUPPORTED;
}
}

void ExpressionGenerator::GenerateIntUnaryOp(
Express::Op *instr,
OpReg opregb, OpMem opmemb,
Expand Down
Loading