In theory of Deep Learning, even a network with single hidden layer could represent any function of mathematics. To verify it, I write a Tensorflow example as below:

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import tensorflow as tf hidden_nodes = 1024 def weight_variable(shape): """weight_variable generates a weight variable of a given shape.""" initial = tf.truncated_normal(shape, stddev=1.0, mean=1.0) return tf.Variable(initial) def bias_variable(shape): """bias_variable generates a bias variable of a given shape.""" initial = tf.constant(0.01, shape=shape) return tf.Variable(initial) with tf.device('/cpu:0'): x = tf.placeholder(tf.float32) y = tf.placeholder(tf.float32) a = tf.reshape(tf.tanh(x), [1, -1]) b = tf.reshape(tf.square(x), [1, -1]) basic = tf.concat([a, b], 0) with tf.name_scope('fc1'): W_fc1 = weight_variable([hidden_nodes, 2]) b_fc1 = bias_variable([1]) linear_model = tf.nn.relu(tf.matmul(W_fc1, basic) + b_fc1) # loss loss = tf.reduce_sum(tf.abs(linear_model - y)) # sum of the squares # optimizer optimizer = tf.train.GradientDescentOptimizer(1e-4) train = optimizer.minimize(loss) # training data x_train = range(0, 10) y_train = range(0, 10) init = tf.global_variables_initializer() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(init) # reset values to wrong for i in range(3000): sess.run(train, {x: x_train, y: y_train}) # evaluate training accuracy curr_basic, curr_w, curr_a, curr_b, curr_loss = sess.run([basic, W_fc1, a, b, loss], {x: x_train, y: y_train}) print("loss: %s" % (curr_loss)) |

In this code, it was trying to regress to a number from its own sine-value and cosine-value.

At first running, the loss didn’t change at all. After I changed learning rate from 1e-3 to 1e-5, the loss slowly went down as normal. I think this is why someone call Deep Learning a “Black Magic” in Machine Learning area.