代码样例
打开拷贝过来的算子样例文件reduction.py
详细的代码解释请参考如下注释。
#coding=utf-8 import te.lang.cce from te import tvm from topi import generic from topi.cce import util def reduction(shape, dtype, axis, operation, coeff, kernel_name="Reduction", need_build=True, need_print=False): """ Reduce a tensor on a certain axis, and scale output with coeff Parameters ---------- shape : shape of data dtype : source data type, only support float16, float32 axis : the first axis to reduce, may be negative to index from the end (e.g., -1 for the last axis). If axis == 0, the output Blob always has the empty shape (count 1), performing reduction across the entire input. op : can only be one of "SUM, ASUM (sum of abs), SUMSQ (sum of sqr), MEAN" coeff : scale for output kernel_name : cce kernel name, default value is "cce_reductionLayer" need_buid : if need to build CCEC kernel, default value is False need_print : if need to print the ir, default value is False Returns ------- None """ #基本参数校验 #shape参数的校验,check_shape_rule()函数的定义文件为ddk/ddk/site-packages/topi-0.4.0.egg/topi/cce/util.py util.check_shape_rule(shape) check_list = ["float16", "float32"] if not (dtype.lower() in check_list): raise RuntimeError("Reduction only support %s while dtype is %s" % (",".join(check_list), dtype)) reduction_op = ("SUM", "ASUM", "SUMSQ", "MEAN") # axis参数校验 if type(axis) != int: raise RuntimeError("type of axis value should be int") if axis >= len(shape) or axis < -len(shape): raise RuntimeError( "input axis is out of range, axis value can be from %d to %d" % (-len(shape), len(shape) - 1)) # op参数校验 if operation not in reduction_op: raise RuntimeError("op can only be one of SUM, ASUM, SUMSQ , MEAN") # coeff参数校验 if type(coeff) != int and type(coeff) != float: raise RuntimeError("coeff must be a value") # 参数预处理 if axis < 0: axis = len(shape) + axis shape = list(shape) shape1 = shape[:axis] + [reduce(lambda x, y: x * y, shape[axis:])] inp_dtype = dtype.lower() # 定义输入数据Tensor,仅占位,未分配实际内存 data = tvm.placeholder(shape1, name="data_input", dtype=inp_dtype) # 定义算子计算过程 with tvm.target.cce(): if operation == "ASUM": data_tmp_input = te.lang.cce.vabs(data) cof = coeff tmp = te.lang.cce.vmuls(data_tmp_input, cof) elif operation == "SUMSQ": data_tmp_input = te.lang.cce.vmul(data, data) cof = coeff tmp = te.lang.cce.vmuls(data_tmp_input, cof) elif operation == "MEAN": size = shape1[-1] cof = float(coeff) * (size ** (-0.5)) tmp = te.lang.cce.vmuls(data, cof) elif operation == "SUM": cof = coeff data_tmp_input = te.lang.cce.vmuls(data, cof) tmp = data_tmp_input #按axis进行求和,降维 res_tmp = te.lang.cce.sum(tmp, axis=axis) #进行数据类型的转换 res = te.lang.cce.cast_to(res_tmp, inp_dtype, f1628IntegerFlag = True) if operation == "MEAN": size = shape1[-1] sqrt_size = size ** (-0.5) res = te.lang.cce.vmuls(res_tmp, sqrt_size) #生成算子计算的schedule对象 sch = generic.auto_schedule(res) #定义编译参数 config = {"print_ir": need_print, "need_build": need_build, "name": kernel_name, "tensor_list": [data, res]} #编译算子,生成目标文件 te.lang.cce.cce_build_code(sch, config) #以shape为(2,3,4),datatype为float16,axis为1,op为SUM,coeff为2,算子名称为Reduction的参数来调用reduction算子 if __name__ == "__main__": reduction((2, 3, 4), "float16", 1, "SUM", coeff = 2,kernel_name = "Reduction")