package com.shreyansh.stransfer.renderscript_neuralnet;

import android.content.Context;
import android.support.v8.renderscript.Allocation;
import android.support.v8.renderscript.Element;
import android.support.v8.renderscript.RenderScript;
import android.support.v8.renderscript.Type;
import android.util.Log;
import java.io.IOException;
import java.nio.FloatBuffer;

/* loaded from: classes.dex */
public class Convolution2DTiled extends NeuralNetLayerBase {
    private final int TILE_Y;
    private float[] W;
    private Allocation W_alloc;
    private float[] b;
    private Allocation b_alloc;
    private int in_channels;
    private int ksize;
    private ScriptC_convolve2d mConvovle;
    public int outH;
    public int outW;
    private int out_channels;
    private int pad;
    private int padded_Y_blas;
    private int stride;

    public Convolution2DTiled(Context context, RenderScript renderScript, int i, int i2, int i3, int i4, int i5) {
        super(context, renderScript);
        this.TILE_Y = 64;
        this.in_channels = i;
        this.out_channels = i2;
        this.ksize = i3;
        this.stride = i4;
        this.pad = i5;
        this.W = new float[i2 * i * i3 * i3];
        this.b = new float[i2];
        this.padded_Y_blas = i * i3 * i3;
        if (this.padded_Y_blas % 8 > 0) {
            this.padded_Y_blas = ((this.padded_Y_blas / 8) + 1) * 8;
        }
        this.W_alloc = Allocation.createTyped(this.mRS, Type.createXY(this.mRS, Element.F32(this.mRS), this.padded_Y_blas, i2));
        this.b_alloc = Allocation.createSized(this.mRS, Element.F32(this.mRS), i2);
        this.mConvovle = new ScriptC_convolve2d(this.mRS);
        this.mConvovle.set_kernel_h(i3);
        this.mConvovle.set_kernel_w(i3);
        this.mConvovle.set_step_x(i4);
        this.mConvovle.set_step_y(i4);
        this.mConvovle.set_pad_h(i5);
        this.mConvovle.set_pad_w(i5);
        this.mConvovle.set_beta_alloc(this.b_alloc);
        this.mConvovle.set_img_channel(i);
        this.mConvovle.set_tile_h(64);
    }

    @Override // com.shreyansh.stransfer.renderscript_neuralnet.NeuralNetLayerBase
    public void loadModel(String str) throws IOException {
        this.mInputStream = this.mContext.getAssets().open(str + "/W", 3);
        FloatBuffer.wrap(this.W).put(readInput(this.mInputStream).asFloatBuffer());
        int i = this.in_channels * this.ksize * this.ksize;
        if (this.padded_Y_blas == i) {
            this.W_alloc.copyFrom(this.W);
        } else {
            Allocation createTyped = Allocation.createTyped(this.mRS, Type.createXY(this.mRS, Element.F32(this.mRS), i, this.out_channels));
            createTyped.copyFrom(this.W);
            this.W_alloc.copy2DRangeFrom(0, 0, i, this.out_channels, createTyped, 0, 0);
        }
        this.mInputStream = this.mContext.getAssets().open(str + "/b", 3);
        FloatBuffer.wrap(this.b).put(readInput(this.mInputStream).asFloatBuffer());
        this.b_alloc.copyFrom(this.b);
        this.mInputStream.close();
        Log.v(NeuralNetLayerBase.TAG, "Convolution2D loaded: " + this.b[0]);
    }

    public Allocation process(Allocation allocation, int i, int i2) {
        this.mConvovle.set_img_h(i);
        this.mConvovle.set_img_w(i2);
        this.mConvovle.set_img_alloc(allocation);
        int i3 = i + (this.pad * 2);
        int i4 = i2 + (this.pad * 2);
        this.outH = ConvolveUtil.get_conv_outsize(i, this.ksize, this.stride, this.pad);
        this.outW = ConvolveUtil.get_conv_outsize(i2, this.ksize, this.stride, this.pad);
        Allocation createTyped = Allocation.createTyped(this.mRS, Type.createXY(this.mRS, Element.F32(this.mRS), this.outH * this.outW, this.out_channels));
        Allocation createTyped2 = Allocation.createTyped(this.mRS, Type.createXY(this.mRS, Element.F32(this.mRS), i3 * i4, this.in_channels));
        this.mConvovle.forEach_zero(createTyped2, createTyped2);
        this.mConvovle.set_padded_alloc(createTyped2);
        this.mConvovle.invoke_padd();
        int i5 = ConvolveUtil.get_conv_outsize(64, this.ksize, this.stride, this.pad);
        int i6 = this.outW;
        Log.v(NeuralNetLayerBase.TAG, "tiled convolve size: " + i5 + " " + i6);
        Allocation createTyped3 = Allocation.createTyped(this.mRS, Type.createXY(this.mRS, Element.F32(this.mRS), i5 * i6, this.padded_Y_blas));
        Allocation createTyped4 = Allocation.createTyped(this.mRS, Type.createXY(this.mRS, Element.F32(this.mRS), i5 * i6, this.out_channels));
        this.mConvovle.set_outH(i5);
        this.mConvovle.set_outW(i6);
        int i7 = i / 64;
        if (i7 == 0) {
            i7 = 1;
        }
        for (int i8 = 0; i8 < i7; i8++) {
            this.mConvovle.set_tile_num(i8);
            long currentTimeMillis = System.currentTimeMillis();
            this.mConvovle.forEach_im2col(createTyped3);
            this.mRS.finish();
            this.im2colTime += System.currentTimeMillis() - currentTimeMillis;
            long currentTimeMillis2 = System.currentTimeMillis();
            this.mBlas.SGEMM(111, 111, 1.0f, this.W_alloc, createTyped3, 0.0f, createTyped4);
            this.mRS.finish();
            this.sgemmTime += System.currentTimeMillis() - currentTimeMillis2;
            createTyped.copy2DRangeFrom(i8 * i5 * i6, 0, i5 * i6, this.out_channels, createTyped4, 0, 0);
        }
        createTyped2.destroy();
        createTyped3.destroy();
        createTyped4.destroy();
        long currentTimeMillis3 = System.currentTimeMillis();
        this.mConvovle.forEach_addBeta(createTyped, createTyped);
        this.mRS.finish();
        this.betaTime += System.currentTimeMillis() - currentTimeMillis3;
        return createTyped;
    }
}
