crnn_layer

cnn๊ณผ rnn์„ ๊ฒฐํ•ฉํ•œ layer ์ž…๋‹ˆ๋‹ค.

rnn์—์„œ fully connected ์—ฐ์‚ฐ์„ convolutional ์—ฐ์‚ฐ์œผ๋กœ ๋ฐ”๋€Œ์–ด์ง„ ๊ฒƒ ์™ธ์— ๋”ฑํžˆ ๋ณ€ํ™”๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.

increment_layer

static void increment_layer(layer *l, int steps)
{
    int num = l->outputs*l->batch*steps;
    l->output += num;
    l->delta += num;
    l->x += num;
    l->x_norm += num;
}

ํ•จ์ˆ˜ ์ด๋ฆ„: increment_layer

์ž…๋ ฅ:

  • layer ํฌ์ธํ„ฐ l

  • int steps

๋™์ž‘:

  • l์˜ output, delta, x, x_norm ํฌ์ธํ„ฐ๋ฅผ steps * l->outputs * l->batch ๋งŒํผ ์ฆ๊ฐ€์‹œํ‚ด

์„ค๋ช…:

  • ์ด ํ•จ์ˆ˜๋Š” ๋ฏธ๋‹ˆ๋ฐฐ์น˜ ์ฒ˜๋ฆฌ๋ฅผ ์œ„ํ•ด ํ•„์š”ํ•œ ํ•จ์ˆ˜ ์ค‘ ํ•˜๋‚˜๋กœ, ๊ฐ ๋ ˆ์ด์–ด์˜ ํฌ์ธํ„ฐ๋ฅผ ๋ฏธ๋‹ˆ๋ฐฐ์น˜์— ๋”ฐ๋ผ ์ ์ ˆํžˆ ์ด๋™์‹œ์ผœ์ฃผ๋Š” ์—ญํ• ์„ ํ•ฉ๋‹ˆ๋‹ค.

  • ์ด๋™์‹œ์ผœ์•ผ ํ•˜๋Š” ์–‘์€ steps * l->outputs * l->batch ๋กœ ๊ณ„์‚ฐ๋ฉ๋‹ˆ๋‹ค.

  • ์ด ํ•จ์ˆ˜๋ฅผ ์‚ฌ์šฉํ•˜๋ฉด ํ•œ ๋ฒˆ์— ์ฒ˜๋ฆฌํ•ด์•ผ ํ•˜๋Š” ๋ฏธ๋‹ˆ๋ฐฐ์น˜์˜ ํฌ๊ธฐ๋ฅผ ์กฐ์ ˆํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.

forward_crnn_layer

void forward_crnn_layer(layer l, network net)
{
    network s = net;
    s.train = net.train;
    int i;
    layer input_layer = *(l.input_layer);
    layer self_layer = *(l.self_layer);
    layer output_layer = *(l.output_layer);

    fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1);
    fill_cpu(l.hidden * l.batch * l.steps, 0, self_layer.delta, 1);
    fill_cpu(l.hidden * l.batch * l.steps, 0, input_layer.delta, 1);
    if(net.train) fill_cpu(l.hidden * l.batch, 0, l.state, 1);

    for (i = 0; i < l.steps; ++i) {
        s.input = net.input;
        forward_convolutional_layer(input_layer, s);

        s.input = l.state;
        forward_convolutional_layer(self_layer, s);

        float *old_state = l.state;
        if(net.train) l.state += l.hidden*l.batch;
        if(l.shortcut){
            copy_cpu(l.hidden * l.batch, old_state, 1, l.state, 1);
        }else{
            fill_cpu(l.hidden * l.batch, 0, l.state, 1);
        }
        axpy_cpu(l.hidden * l.batch, 1, input_layer.output, 1, l.state, 1);
        axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1);

        s.input = l.state;
        forward_convolutional_layer(output_layer, s);

        net.input += l.inputs*l.batch;
        increment_layer(&input_layer, 1);
        increment_layer(&self_layer, 1);
        increment_layer(&output_layer, 1);
    }
}

ํ•จ์ˆ˜ ์ด๋ฆ„: forward_crnn_layer

์ž…๋ ฅ:

  • layer l: CRNN ๋ ˆ์ด์–ด

  • network net: ๋ ˆ์ด์–ด๊ฐ€ ์†ํ•œ ๋„คํŠธ์›Œํฌ

๋™์ž‘:

  • CRNN ๋ ˆ์ด์–ด์˜ forward ์—ฐ์‚ฐ์„ ์ˆ˜ํ–‰ํ•œ๋‹ค.

  • ์ž…๋ ฅ ๋ฐ์ดํ„ฐ๋ฅผ ํ•œ ์Šคํ…์”ฉ ์ฒ˜๋ฆฌํ•˜๋ฉฐ, ์ž…๋ ฅ ๋ ˆ์ด์–ด, self ๋ ˆ์ด์–ด, ์ถœ๋ ฅ ๋ ˆ์ด์–ด๋ฅผ ์ฐจ๋ก€๋Œ€๋กœ ๊ฑฐ์นœ๋‹ค.

  • ๊ฐ ์Šคํ…์—์„œ ์ž…๋ ฅ, self ๋ ˆ์ด์–ด์˜ ์ถœ๋ ฅ์„ ๋”ํ•˜์—ฌ state๋ฅผ ๊ตฌํ•˜๊ณ , ์ถœ๋ ฅ ๋ ˆ์ด์–ด๋ฅผ ๊ฑฐ์ณ ์ถœ๋ ฅ์„ ๊ณ„์‚ฐํ•œ๋‹ค.

  • ๊ฐ ์Šคํ…์—์„œ ์‚ฌ์šฉ๋œ ๋ ˆ์ด์–ด์˜ ์ธ๋ฑ์Šค๋ฅผ 1์”ฉ ์ฆ๊ฐ€์‹œํ‚จ๋‹ค.

์„ค๋ช…:

  • CRNN(Convolutional Recurrent Neural Network)์€ ์ปจ๋ณผ๋ฃจ์…˜ ๋ ˆ์ด์–ด์™€ ์ˆœํ™˜ ๋ ˆ์ด์–ด๊ฐ€ ๊ฒฐํ•ฉ๋œ ๊ตฌ์กฐ๋ฅผ ๊ฐ€์ง€๋Š” ๋”ฅ๋Ÿฌ๋‹ ๋ชจ๋ธ์ด๋‹ค.

  • ์ด ํ•จ์ˆ˜๋Š” CRNN ๋ ˆ์ด์–ด์˜ forward ์—ฐ์‚ฐ์„ ์ˆ˜ํ–‰ํ•˜๋Š” ํ•จ์ˆ˜์ด๋‹ค.

  • ์ž…๋ ฅ์œผ๋กœ๋Š” CRNN ๋ ˆ์ด์–ด์™€ ๋ ˆ์ด์–ด๊ฐ€ ์†ํ•œ ๋„คํŠธ์›Œํฌ๊ฐ€ ๋“ค์–ด์˜จ๋‹ค.

  • ํ•จ์ˆ˜ ๋‚ด๋ถ€์—์„œ๋Š” ์ž…๋ ฅ ๋ฐ์ดํ„ฐ๋ฅผ ํ•œ ์Šคํ…์”ฉ ์ฒ˜๋ฆฌํ•˜๋ฉฐ, ์ž…๋ ฅ ๋ ˆ์ด์–ด, self ๋ ˆ์ด์–ด, ์ถœ๋ ฅ ๋ ˆ์ด์–ด๋ฅผ ์ฐจ๋ก€๋Œ€๋กœ ๊ฑฐ์นœ๋‹ค.

  • ๊ฐ ์Šคํ…์—์„œ ์ž…๋ ฅ, self ๋ ˆ์ด์–ด์˜ ์ถœ๋ ฅ์„ ๋”ํ•˜์—ฌ state๋ฅผ ๊ตฌํ•˜๊ณ , ์ถœ๋ ฅ ๋ ˆ์ด์–ด๋ฅผ ๊ฑฐ์ณ ์ถœ๋ ฅ์„ ๊ณ„์‚ฐํ•œ๋‹ค.

  • ํ•จ์ˆ˜ ๋‚ด๋ถ€์—์„œ๋Š” ๊ฐ ์Šคํ…์—์„œ ์‚ฌ์šฉ๋œ ๋ ˆ์ด์–ด์˜ ์ธ๋ฑ์Šค๋ฅผ 1์”ฉ ์ฆ๊ฐ€์‹œํ‚จ๋‹ค.

backward_crnn_layer

void backward_crnn_layer(layer l, network net)
{
    network s = net;
    int i;
    layer input_layer = *(l.input_layer);
    layer self_layer = *(l.self_layer);
    layer output_layer = *(l.output_layer);

    increment_layer(&input_layer, l.steps-1);
    increment_layer(&self_layer, l.steps-1);
    increment_layer(&output_layer, l.steps-1);

    l.state += l.hidden*l.batch*l.steps;
    for (i = l.steps-1; i >= 0; --i) {
        copy_cpu(l.hidden * l.batch, input_layer.output, 1, l.state, 1);
        axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1);

        s.input = l.state;
        s.delta = self_layer.delta;
        backward_convolutional_layer(output_layer, s);

        l.state -= l.hidden*l.batch;
        /*
           if(i > 0){
           copy_cpu(l.hidden * l.batch, input_layer.output - l.hidden*l.batch, 1, l.state, 1);
           axpy_cpu(l.hidden * l.batch, 1, self_layer.output - l.hidden*l.batch, 1, l.state, 1);
           }else{
           fill_cpu(l.hidden * l.batch, 0, l.state, 1);
           }
         */

        s.input = l.state;
        s.delta = self_layer.delta - l.hidden*l.batch;
        if (i == 0) s.delta = 0;
        backward_convolutional_layer(self_layer, s);

        copy_cpu(l.hidden*l.batch, self_layer.delta, 1, input_layer.delta, 1);
        if (i > 0 && l.shortcut) axpy_cpu(l.hidden*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.hidden*l.batch, 1);
        s.input = net.input + i*l.inputs*l.batch;
        if(net.delta) s.delta = net.delta + i*l.inputs*l.batch;
        else s.delta = 0;
        backward_convolutional_layer(input_layer, s);

        increment_layer(&input_layer, -1);
        increment_layer(&self_layer, -1);
        increment_layer(&output_layer, -1);
    }
}

ํ•จ์ˆ˜ ์ด๋ฆ„: backward_crnn_layer

์ž…๋ ฅ:

  • layer l: ์—ญ์ „ํŒŒ๋ฅผ ์ˆ˜ํ–‰ํ•  CRNN ๋ ˆ์ด์–ด

  • network net: ๋ ˆ์ด์–ด๋ฅผ ํฌํ•จํ•˜๋Š” ๋„คํŠธ์›Œํฌ

๋™์ž‘:

  • CRNN ๋ ˆ์ด์–ด์˜ ์—ญ์ „ํŒŒ๋ฅผ ์ˆ˜ํ–‰ํ•ฉ๋‹ˆ๋‹ค.

  • ๋จผ์ €, ์ž…๋ ฅ ๋ ˆ์ด์–ด, self ๋ ˆ์ด์–ด, output ๋ ˆ์ด์–ด์— ๋Œ€ํ•œ ํฌ์ธํ„ฐ๋ฅผ ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค.

  • ๊ทธ๋Ÿฐ ๋‹ค์Œ, l.steps ๋ฒˆ ๋ฐ˜๋ณตํ•˜๋ฉด์„œ ๊ฐ ์Šคํ…์—์„œ ๋‹ค์Œ์„ ์ˆ˜ํ–‰ํ•ฉ๋‹ˆ๋‹ค.

  • ์ž…๋ ฅ ๋ ˆ์ด์–ด์™€ self ๋ ˆ์ด์–ด์˜ ์ถœ๋ ฅ ๊ฐ’์„ ํ•ฉ์ณ์„œ l.state์— ์ €์žฅํ•œ ํ›„, ์ถœ๋ ฅ ๋ ˆ์ด์–ด์˜ ์—ญ์ „ํŒŒ๋ฅผ ์ˆ˜ํ–‰ํ•ฉ๋‹ˆ๋‹ค.

  • ๊ทธ ํ›„, self ๋ ˆ์ด์–ด์˜ ์—ญ์ „ํŒŒ๋ฅผ ์ˆ˜ํ–‰ํ•˜๊ณ , ์ด์ „ ์Šคํ…์˜ self ๋ ˆ์ด์–ด ์—…๋ฐ์ดํŠธ ๋ธํƒ€๋ฅผ ํ˜„์žฌ ์Šคํ…์˜ ์ž…๋ ฅ ๋ ˆ์ด์–ด ์—…๋ฐ์ดํŠธ ๋ธํƒ€๋กœ ๋ณต์‚ฌํ•ฉ๋‹ˆ๋‹ค.

  • ๋งˆ์ง€๋ง‰์œผ๋กœ, ํ˜„์žฌ ์Šคํ…์˜ ์ž…๋ ฅ ๋ฐ์ดํ„ฐ์— ๋Œ€ํ•œ ์—ญ์ „ํŒŒ๋ฅผ ์ˆ˜ํ–‰ํ•ฉ๋‹ˆ๋‹ค.

์„ค๋ช…:

  • ์ด ํ•จ์ˆ˜๋Š” CRNN ๋ ˆ์ด์–ด์˜ ์—ญ์ „ํŒŒ๋ฅผ ์ˆ˜ํ–‰ํ•˜๋Š” ํ•จ์ˆ˜๋กœ, ๋„คํŠธ์›Œํฌ๊ฐ€ ํ•™์Šต ์ค‘์ธ ๊ฒฝ์šฐ์— ์‚ฌ์šฉ๋ฉ๋‹ˆ๋‹ค.

  • l์€ ์—ญ์ „ํŒŒ๋ฅผ ์ˆ˜ํ–‰ํ•  ๋ ˆ์ด์–ด๋ฅผ ๋‚˜ํƒ€๋‚ด๋Š” layer ๊ตฌ์กฐ์ฒด์ด๋ฉฐ, net์€ ๋ ˆ์ด์–ด๋ฅผ ํฌํ•จํ•˜๋Š” ๋„คํŠธ์›Œํฌ๋ฅผ ๋‚˜ํƒ€๋‚ด๋Š” network ๊ตฌ์กฐ์ฒด์ž…๋‹ˆ๋‹ค.

  • ์ด ํ•จ์ˆ˜๋Š” ๊ฐ ๋ ˆ์ด์–ด์˜ ์ถœ๋ ฅ ๊ฐ’์„ ๊ณ„์‚ฐํ•˜๊ณ  ๋ธํƒ€ ๊ฐ’์„ ์—…๋ฐ์ดํŠธํ•ฉ๋‹ˆ๋‹ค.

update_crnn_layer

void update_crnn_layer(layer l, update_args a)
{
    update_convolutional_layer(*(l.input_layer),  a);
    update_convolutional_layer(*(l.self_layer),   a);
    update_convolutional_layer(*(l.output_layer), a);
}

ํ•จ์ˆ˜ ์ด๋ฆ„: update_crnn_layer

์ž…๋ ฅ:

  • layer l: ์—…๋ฐ์ดํŠธํ•  CRNN ๋ ˆ์ด์–ด

  • update_args a: ์—…๋ฐ์ดํŠธ์— ์‚ฌ์šฉํ•  ์ธ์ž๋“ค (learning rate, momentum ๋“ฑ)

๋™์ž‘:

  • ์ฃผ์–ด์ง„ ์—…๋ฐ์ดํŠธ ์ธ์ž๋“ค์„ ์‚ฌ์šฉํ•˜์—ฌ ์ž…๋ ฅ์œผ๋กœ ์ฃผ์–ด์ง„ CRNN ๋ ˆ์ด์–ด์˜ input_layer, self_layer, output_layer๋ฅผ ๊ฐ๊ฐ ์—…๋ฐ์ดํŠธํ•˜๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.

  • update_convolutional_layer ํ•จ์ˆ˜๋ฅผ ํ˜ธ์ถœํ•˜์—ฌ ๊ฐ ๋ ˆ์ด์–ด๋ฅผ ์—…๋ฐ์ดํŠธํ•ฉ๋‹ˆ๋‹ค.

์„ค๋ช…:

  • CRNN ๋ ˆ์ด์–ด๋Š” ์ž…๋ ฅ ์‹œํ€€์Šค๋ฅผ ์ฒ˜๋ฆฌํ•˜๊ธฐ ์œ„ํ•œ ์ปจ๋ณผ๋ฃจ์…˜ ๋ ˆ์ด์–ด์™€ RNN ๋ ˆ์ด์–ด์˜ ๊ฒฐํ•ฉ์ž…๋‹ˆ๋‹ค.

  • ์ด ํ•จ์ˆ˜๋Š” ๊ทธ ์ค‘ ์ปจ๋ณผ๋ฃจ์…˜ ๋ ˆ์ด์–ด๋ฅผ ์—…๋ฐ์ดํŠธํ•˜๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.

  • ์ด ํ•จ์ˆ˜๋Š” ์ž…๋ ฅ์œผ๋กœ ๋ฐ›์€ update_args๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๊ฐ ๋ ˆ์ด์–ด์˜ ํŒŒ๋ผ๋ฏธํ„ฐ๋ฅผ ์—…๋ฐ์ดํŠธํ•ฉ๋‹ˆ๋‹ค.

  • ๋จผ์ €, input_layer, self_layer, output_layer ๊ฐ๊ฐ์— ๋Œ€ํ•ด update_convolutional_layer ํ•จ์ˆ˜๋ฅผ ํ˜ธ์ถœํ•˜์—ฌ ๊ทธ ๋ ˆ์ด์–ด์˜ ํŒŒ๋ผ๋ฏธํ„ฐ๋ฅผ ์—…๋ฐ์ดํŠธํ•ฉ๋‹ˆ๋‹ค.

  • ์ด ํ•จ์ˆ˜๋Š” ์ปจ๋ณผ๋ฃจ์…˜ ๋ ˆ์ด์–ด์˜ ํŒŒ๋ผ๋ฏธํ„ฐ๋ฅผ ์—…๋ฐ์ดํŠธํ•˜๊ธฐ ์œ„ํ•ด ์‚ฌ์šฉ๋˜๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.

make_crnn_layer

layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize)
{
    fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters);
    batch = batch / steps;
    layer l = {0};
    l.batch = batch;
    l.type = CRNN;
    l.steps = steps;
    l.h = h;
    l.w = w;
    l.c = c;
    l.out_h = h;
    l.out_w = w;
    l.out_c = output_filters;
    l.inputs = h*w*c;
    l.hidden = h * w * hidden_filters;
    l.outputs = l.out_h * l.out_w * l.out_c;

    l.state = calloc(l.hidden*batch*(steps+1), sizeof(float));

    l.input_layer = malloc(sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.input_layer) = make_convolutional_layer(batch*steps, h, w, c, hidden_filters, 1, 3, 1, 1,  activation, batch_normalize, 0, 0, 0);
    l.input_layer->batch = batch;

    l.self_layer = malloc(sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.self_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, hidden_filters, 1, 3, 1, 1,  activation, batch_normalize, 0, 0, 0);
    l.self_layer->batch = batch;

    l.output_layer = malloc(sizeof(layer));
    fprintf(stderr, "\t\t");
    *(l.output_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, output_filters, 1, 3, 1, 1,  activation, batch_normalize, 0, 0, 0);
    l.output_layer->batch = batch;

    l.output = l.output_layer->output;
    l.delta = l.output_layer->delta;

    l.forward = forward_crnn_layer;
    l.backward = backward_crnn_layer;
    l.update = update_crnn_layer;

    return l;
}

ํ•จ์ˆ˜ ์ด๋ฆ„: make_crnn_layer

์ž…๋ ฅ:

  • int batch: ๋ฐฐ์น˜ ํฌ๊ธฐ

  • int h: ์ž…๋ ฅ ์ด๋ฏธ์ง€ ๋†’์ด

  • int w: ์ž…๋ ฅ ์ด๋ฏธ์ง€ ๋„ˆ๋น„

  • int c: ์ž…๋ ฅ ์ด๋ฏธ์ง€ ์ฑ„๋„ ์ˆ˜

  • int hidden_filters: ์ˆจ๊ฒจ์ง„ ๋ ˆ์ด์–ด์—์„œ ์‚ฌ์šฉ๋˜๋Š” ํ•„ํ„ฐ ์ˆ˜

  • int output_filters: ์ถœ๋ ฅ ๋ ˆ์ด์–ด์—์„œ ์‚ฌ์šฉ๋˜๋Š” ํ•„ํ„ฐ ์ˆ˜

  • int steps: ์‹œํ€€์Šค ๊ธธ์ด (์Šคํ… ์ˆ˜)

  • ACTIVATION activation: ํ™œ์„ฑํ™” ํ•จ์ˆ˜ ์œ ํ˜•

  • int batch_normalize: ๋ฐฐ์น˜ ์ •๊ทœํ™” ์—ฌ๋ถ€

๋™์ž‘:

  • CRNN ๋ ˆ์ด์–ด๋ฅผ ๋งŒ๋“ค๊ณ  ์ดˆ๊ธฐํ™”ํ•ฉ๋‹ˆ๋‹ค.

์„ค๋ช…:

  • ์ด ํ•จ์ˆ˜๋Š” ์ž…๋ ฅ ์ด๋ฏธ์ง€์˜ ๋†’์ด, ๋„ˆ๋น„, ์ฑ„๋„ ์ˆ˜ ๋ฐ ์‹œํ€€์Šค ๊ธธ์ด์™€ ๊ฐ™์€ ์ธ์ˆ˜๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ CRNN(Convolutional Recurrent Neural Network) ๋ ˆ์ด์–ด๋ฅผ ๋งŒ๋“ญ๋‹ˆ๋‹ค.

  • ์ด ๋ ˆ์ด์–ด๋Š” ์ˆจ๊ฒจ์ง„ ๋ ˆ์ด์–ด์™€ ์ถœ๋ ฅ ๋ ˆ์ด์–ด ๊ฐ๊ฐ์— ๋Œ€ํ•ด 3x3 ์ปค๋„๊ณผ ๊ฐ™์€ ํ•˜์ดํผํŒŒ๋ผ๋ฏธํ„ฐ๋ฅผ ์‚ฌ์šฉํ•œ 1D ์ปจ๋ณผ๋ฃจ์…˜ ๋ ˆ์ด์–ด๋ฅผ ํฌํ•จํ•ฉ๋‹ˆ๋‹ค.

  • ์ด ํ•จ์ˆ˜๋Š” ์ด๋Ÿฌํ•œ ๋ ˆ์ด์–ด๋ฅผ ๋งŒ๋“ค๊ณ  ์ดˆ๊ธฐํ™”ํ•œ ํ›„ CRNN ๋ ˆ์ด์–ด๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.

Last updated

Was this helpful?