logistic_layer

forward_logistic_layer

void forward_logistic_layer(const layer l, network net)
{
    copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1);
    activate_array(l.output, l.outputs*l.batch, LOGISTIC);
    if(net.truth){
        logistic_x_ent_cpu(l.batch*l.inputs, l.output, net.truth, l.delta, l.loss);
        l.cost[0] = sum_array(l.loss, l.batch*l.inputs);
    }
}

ν•¨μˆ˜ 이름: forward_logistic_layer

μž…λ ₯:

  • const layer l: λ ˆμ΄μ–΄ ꡬ쑰체 포인터

  • network net: λ„€νŠΈμ›Œν¬ ꡬ쑰체

λ™μž‘:

  • λ‘œμ§€μŠ€ν‹± ν•¨μˆ˜λ₯Ό μ‚¬μš©ν•˜μ—¬ λ ˆμ΄μ–΄μ˜ 좜λ ₯값을 κ³„μ‚°ν•˜κ³ , λ§Œμ•½ net.truthκ°€ μ°Έ(True)이면 λ‘œμ§€μŠ€ν‹± 손싀 ν•¨μˆ˜λ₯Ό κ³„μ‚°ν•˜μ—¬ l.loss와 l.deltaλ₯Ό μ—…λ°μ΄νŠΈν•˜κ³ , l.cost에 l.loss의 합을 μ €μž₯ν•œλ‹€.

μ„€λͺ…:

  • copy_cpu(l.outputsl.batch, net.input, 1, l.output, 1): net.inputμ—μ„œ l.output으둜 l.outputsl.batch 개의 μ‹€μˆ˜κ°’μ„ λ³΅μ‚¬ν•œλ‹€.

  • activate_array(l.output, l.outputs*l.batch, LOGISTIC): l.output 배열에 μžˆλŠ” λͺ¨λ“  값에 λŒ€ν•΄ λ‘œμ§€μŠ€ν‹± ν™œμ„±ν™” ν•¨μˆ˜λ₯Ό μ μš©ν•œλ‹€.

  • if(net.truth): λ§Œμ•½ net.truthκ°€ μ°Έ(True)이면 λ‘œμ§€μŠ€ν‹± 손싀 ν•¨μˆ˜λ₯Ό κ³„μ‚°ν•˜μ—¬ l.loss와 l.deltaλ₯Ό μ—…λ°μ΄νŠΈν•˜κ³ , l.cost에 l.loss의 합을 μ €μž₯ν•œλ‹€.

    • logistic_x_ent_cpu(l.batch*l.inputs, l.output, net.truth, l.delta, l.loss): λ‘œμ§€μŠ€ν‹± 손싀 ν•¨μˆ˜μ™€ κ·Έ λ„ν•¨μˆ˜λ₯Ό κ³„μ‚°ν•˜κ³ , l.delta와 l.lossλ₯Ό μ—…λ°μ΄νŠΈν•œλ‹€.

    • l.cost[0] = sum_array(l.loss, l.batch*l.inputs): l.loss λ°°μ—΄μ˜ λͺ¨λ“  값을 λ”ν•˜μ—¬ l.cost[0]에 μ €μž₯ν•œλ‹€.

backward_logistic_layer

void backward_logistic_layer(const layer l, network net)
{
    axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1);
}

ν•¨μˆ˜ 이름: backward_logistic_layer

μž…λ ₯:

  • const layer l (λ ˆμ΄μ–΄ ꡬ쑰체)

  • network net (λ„€νŠΈμ›Œν¬ ꡬ쑰체)

λ™μž‘:

  • λ‘œμ§€μŠ€ν‹± νšŒκ·€ λ ˆμ΄μ–΄μ˜ μ—­μ „νŒŒ(backpropagation)λ₯Ό μˆ˜ν–‰ν•©λ‹ˆλ‹€.

  • μž…λ ₯ 데이터에 λŒ€ν•œ 였λ₯˜ κ·Έλž˜λ””μ–ΈνŠΈλ₯Ό κ³„μ‚°ν•˜κ³ , λ„€νŠΈμ›Œν¬μ˜ 이전 λ ˆμ΄μ–΄λ‘œ κ·Έλž˜λ””μ–ΈνŠΈλ₯Ό μ „νŒŒν•©λ‹ˆλ‹€.

μ„€λͺ…:

  • λ‘œμ§€μŠ€ν‹± νšŒκ·€ λ ˆμ΄μ–΄μ˜ μ—­μ „νŒŒ ν•¨μˆ˜μž…λ‹ˆλ‹€.

  • μž…λ ₯μœΌλ‘œλŠ” λ‘œμ§€μŠ€ν‹± νšŒκ·€ λ ˆμ΄μ–΄λ₯Ό λ‚˜νƒ€λ‚΄λŠ” layer ꡬ쑰체와, ν•΄λ‹Ή λ ˆμ΄μ–΄λ₯Ό μ†Œμœ ν•˜λŠ” λ„€νŠΈμ›Œν¬λ₯Ό λ‚˜νƒ€λ‚΄λŠ” network ꡬ쑰체가 μž…λ ₯λ©λ‹ˆλ‹€.

  • ν•¨μˆ˜λŠ” l.delta와 net.deltaλ₯Ό μ΄μš©ν•˜μ—¬ κ·Έλž˜λ””μ–ΈνŠΈλ₯Ό κ³„μ‚°ν•˜κ³ , net.delta에 κ²°κ³Όλ₯Ό μ €μž₯ν•©λ‹ˆλ‹€.

make_logistic_layer

layer make_logistic_layer(int batch, int inputs)
{
    fprintf(stderr, "logistic x entropy                             %4d\n",  inputs);
    layer l = {0};
    l.type = LOGXENT;
    l.batch = batch;
    l.inputs = inputs;
    l.outputs = inputs;
    l.loss = calloc(inputs*batch, sizeof(float));
    l.output = calloc(inputs*batch, sizeof(float));
    l.delta = calloc(inputs*batch, sizeof(float));
    l.cost = calloc(1, sizeof(float));

    l.forward = forward_logistic_layer;
    l.backward = backward_logistic_layer;
    #ifdef GPU
    l.forward_gpu = forward_logistic_layer_gpu;
    l.backward_gpu = backward_logistic_layer_gpu;

    l.output_gpu = cuda_make_array(l.output, inputs*batch);
    l.loss_gpu = cuda_make_array(l.loss, inputs*batch);
    l.delta_gpu = cuda_make_array(l.delta, inputs*batch);
    #endif
    return l;
}

ν•¨μˆ˜ 이름: make_logistic_layer

μž…λ ₯:

  • batch: 배치 크기 (int)

  • inputs: μž…λ ₯ λ°μ΄ν„°μ˜ 크기 (int)

λ™μž‘:

  • λ‘œμ§€μŠ€ν‹± νšŒκ·€μ™€ ꡐ차 μ—”νŠΈλ‘œν”Ό 손싀 ν•¨μˆ˜λ₯Ό μ‚¬μš©ν•˜λŠ” λ ˆμ΄μ–΄λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.

  • μž…λ ₯ λ°μ΄ν„°μ˜ 크기와 배치 크기λ₯Ό μ„€μ •ν•˜κ³ , 좜λ ₯, 둜슀, 델타, μ½”μŠ€νŠΈ 등을 μ΄ˆκΈ°ν™”ν•©λ‹ˆλ‹€.

  • ν¬μ›Œλ“œ(forward)와 λ°±μ›Œλ“œ(backward) ν•¨μˆ˜λ₯Ό μ„€μ •ν•©λ‹ˆλ‹€.

μ„€λͺ…:

  • λ‘œμ§€μŠ€ν‹± νšŒκ·€λŠ” λΆ„λ₯˜ λ¬Έμ œμ—μ„œ μ‚¬μš©λ˜λŠ” λŒ€ν‘œμ μΈ μ•Œκ³ λ¦¬μ¦˜ 쀑 ν•˜λ‚˜λ‘œ, μž…λ ₯ 데이터λ₯Ό 이진 λΆ„λ₯˜(binary classification)ν•˜λŠ” 데 μ‚¬μš©λ©λ‹ˆλ‹€.

  • ꡐ차 μ—”νŠΈλ‘œν”Ό 손싀 ν•¨μˆ˜λŠ” λ‘œμ§€μŠ€ν‹± νšŒκ·€μ—μ„œ μ‚¬μš©λ˜λŠ” 손싀 ν•¨μˆ˜ 쀑 ν•˜λ‚˜λ‘œ, 예츑 κ°’κ³Ό μ‹€μ œ κ°’μ˜ 차이λ₯Ό κ³„μ‚°ν•˜μ—¬ λͺ¨λΈμ˜ 손싀을 κ³„μ‚°ν•©λ‹ˆλ‹€.

  • μž…λ ₯ λ°μ΄ν„°μ˜ ν¬κΈ°λŠ” λͺ¨λΈμ˜ μž…λ ₯ 크기λ₯Ό μ˜λ―Έν•˜λ©°, 배치 ν¬κΈ°λŠ” ν•œ λ²ˆμ— μ²˜λ¦¬ν•  λ°μ΄ν„°μ˜ 개수λ₯Ό μ˜λ―Έν•©λ‹ˆλ‹€.

Last updated