tensorflow 笔记

实现加法:

1
2
3
4
5
6
7
#coding:utf-8

import tensorflow as tf #引入模块
a = tf.constant([1.0, 2.0]) #定义一个张量等于[1.0,2.0]
b = tf.constant([3.0, 4.0]) #定义一个张量等于[3.0,4.0]
result = a+b #实现 a 加 b 的加法
print (result) #打印出结果

输出结果:

1
2
C:\Users\wangxiuwen\Desktop\tensor>python test.py
Tensor("add:0", shape=(2,), dtype=float32)

实现矩阵乘法:

1
2
3
4
5
6
7
import tensorflow as tf #引入模块
x = tf.constant([[1.0, 2.0]]) #定义一个 2 阶张量等于[[1.0,2.0]]
w = tf.constant([[3.0], [4.0]]) #定义一个 2 阶张量等于[[3.0],[4.0]]
y = tf.matmul(x, w) #实现 xw 矩阵乘法, 根据矩阵乘法的定义,要求矩阵A的列数和矩阵B的行数一致

with tf.Session() as sess:
print (sess.run(y))

构建神经网络:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#coding:utf-8

import tensorflow as tf

with tf.device('/gpu:0'):
x=tf.placeholder(tf.float32,shape=(None,2))
w1=tf.Variable(tf.random_normal([2,3],stddev=1,seed=1))
w2=tf.Variable(tf.random_normal([3,1],stddev=1,seed=1))

a=tf.matmul(x,w1)
y=tf.matmul(a,w2)

with tf.Session() as sess:
init_op=tf.global_variables_initializer()
sess.run(init_op)
print("y is :\n", sess.run(y,
feed_dict={
x:[
[0.7, 0.5],
[0.2, 0.3],
[0.3, 0.4],
[0.4, 0.5]
]
})
)

反向传播:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#coding:utf-8
#0导入模块,生成模拟数据集。
import tensorflow as tf
import numpy as np
import datetime

BATCH_SIZE = 8
SEED = 23455

#基于seed产生随机数
rdm = np.random.RandomState(SEED)
#随机数返回32行2列的矩阵 表示32组 体积和重量 作为输入数据集
X = rdm.rand(32,2)

#从X这个32行2列的矩阵中 取出一行 判断如果和小于1 给Y赋值1 如果和不小于1 给Y赋值0
#作为输入数据集的标签(正确答案)
Y_ = [[int(x0 + x1 < 1)] for (x0, x1) in X]

#1定义神经网络的输入、参数和输出,定义前向传播过程。
x = tf.placeholder(tf.float32, shape=(None, 2))
y_= tf.placeholder(tf.float32, shape=(None, 1))

w1= tf.Variable(tf.random_normal([2, 3], stddev=1, seed=1))
w2= tf.Variable(tf.random_normal([3, 1], stddev=1, seed=1))

a = tf.matmul(x, w1)
y = tf.matmul(a, w2)

#2定义损失函数及反向传播方法。
loss_mse = tf.reduce_mean(tf.square(y-y_))
train_step = tf.train.GradientDescentOptimizer(0.001).minimize(loss_mse)
#train_step = tf.train.MomentumOptimizer(0.001,0.9).minimize(loss_mse)
#train_step = tf.train.AdamOptimizer(0.001).minimize(loss_mse)

#3生成会话,训练STEPS轮
with tf.Session() as sess:
starttime = datetime.datetime.now()
init_op = tf.global_variables_initializer()
sess.run(init_op)
# 输出目前(未经训练)的参数取值。
print ("w1:\n", sess.run(w1))
print ("w2:\n", sess.run(w2))
print ("\n")

# 训练模型。
STEPS = 20001
for i in range(STEPS):
start = (i*BATCH_SIZE) % 32
end = start + BATCH_SIZE
sess.run(train_step, feed_dict={x: X[start:end], y_: Y_[start:end]})
if i % 2000 == 0:
total_loss = sess.run(loss_mse, feed_dict={x: X, y_: Y_})
print("After %d training step(s), loss_mse on all data is %g" % (i, total_loss))
# 输出训练后的参数取值。
print ("\n")
print ("w1:\n", sess.run(w1))
print ("w2:\n", sess.run(w2))
endtime = datetime.datetime.now()
interval=(endtime - starttime).seconds
print("interval: ", interval)

输出结果:

1
2
3
4
5
6
7
8
w1:
[[-0.69053286 0.8059843 0.0969111 ]
[-2.3437088 -0.10367863 0.58519495]]
w2:
[[-0.08949634]
[ 0.8059557 ]
[-0.05076383]]
After 18000 training step(s), loss_mse on all data is 0.383561

自定义损失函数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40

#coding:utf-8
#酸奶成本9元, 酸奶利润1元
#预测多了损失大,故不要预测多,故生成的模型会少预测一些
#0导入模块,生成数据集
import tensorflow as tf
import numpy as np
BATCH_SIZE = 8
SEED = 23455
COST = 9
PROFIT = 1

rdm = np.random.RandomState(SEED)
X = rdm.rand(32,2)
Y = [[x1+x2+(rdm.rand()/10.0-0.05)] for (x1, x2) in X]

#1定义神经网络的输入、参数和输出,定义前向传播过程。
x = tf.placeholder(tf.float32, shape=(None, 2))
y_ = tf.placeholder(tf.float32, shape=(None, 1))
w1= tf.Variable(tf.random_normal([2, 1], stddev=1, seed=1))
y = tf.matmul(x, w1)

#2定义损失函数及反向传播方法。
#重新定义损失函数,使得预测多了的损失大,于是模型应该偏向少的方向预测。
loss = tf.reduce_sum(tf.where(tf.greater(y, y_), (y - y_)*COST, (y_ - y)*PROFIT))
train_step = tf.train.GradientDescentOptimizer(0.001).minimize(loss)

#3生成会话,训练STEPS轮。
with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
STEPS = 3000
for i in range(STEPS):
start = (i*BATCH_SIZE) % 32
end = (i*BATCH_SIZE) % 32 + BATCH_SIZE
sess.run(train_step, feed_dict={x: X[start:end], y_: Y[start:end]})
if i % 500 == 0:
print "After %d training steps, w1 is: " % (i)
print sess.run(w1), "\n"
print "Final w1 is: \n", sess.run(w1)

可衰减的学习率:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#coding:utf-8
#设损失函数 loss=(w+1)^2, 令w初值是常数10。反向传播就是求最优w,即求最小loss对应的w值
#使用指数衰减的学习率,在迭代初期得到较高的下降速度,可以在较小的训练轮数下取得更有收敛度。
import tensorflow as tf

LEARNING_RATE_BASE = 0.1 #最初学习率
LEARNING_RATE_DECAY = 0.99 #学习率衰减率
LEARNING_RATE_STEP = 1 #喂入多少轮BATCH_SIZE后,更新一次学习率,一般设为:总样本数/BATCH_SIZE

#运行了几轮BATCH_SIZE的计数器,初值给0, 设为不被训练
global_step = tf.Variable(0, trainable=False)
#定义指数下降学习率
learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, LEARNING_RATE_STEP, LEARNING_RATE_DECAY, staircase=True)
#定义待优化参数,初值给10
w = tf.Variable(tf.constant(5, dtype=tf.float32))
#定义损失函数loss
loss = tf.square(w+1)
#定义反向传播方法
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
#生成会话,训练40轮
with tf.Session() as sess:
init_op=tf.global_variables_initializer()
sess.run(init_op)
for i in range(40):
sess.run(train_step)
learning_rate_val = sess.run(learning_rate)
global_step_val = sess.run(global_step)
w_val = sess.run(w)
loss_val = sess.run(loss)
print ("After %s steps: global_step is %f, w is %f, learning rate is %f, loss is %f" % (i, global_step_val, w_val, learning_rate_val, loss_val))

滑动平均(影子):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#coding:utf-8
import tensorflow as tf

#1. 定义变量及滑动平均类
#定义一个32位浮点变量,初始值为0.0 这个代码就是不断更新w1参数,优化w1参数,滑动平均做了个w1的影子
w1 = tf.Variable(0, dtype=tf.float32)
#定义num_updates(NN的迭代轮数),初始值为0,不可被优化(训练),这个参数不训练
global_step = tf.Variable(0, trainable=False)
#实例化滑动平均类,给衰减率为0.99,当前轮数global_step
MOVING_AVERAGE_DECAY = 0.99
ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
#ema.apply后的括号里是更新列表,每次运行sess.run(ema_op)时,对更新列表中的元素求滑动平均值。
#在实际应用中会使用tf.trainable_variables()自动将所有待训练的参数汇总为列表
#ema_op = ema.apply([w1])
ema_op = ema.apply(tf.trainable_variables())

#2. 查看不同迭代中变量取值的变化。
with tf.Session() as sess:
# 初始化
init_op = tf.global_variables_initializer()
sess.run(init_op)
#用ema.average(w1)获取w1滑动平均值 (要运行多个节点,作为列表中的元素列出,写在sess.run中)
#打印出当前参数w1和w1滑动平均值
print( "current global_step:", sess.run(global_step))
print( "current w1", sess.run([w1, ema.average(w1)]))

# 参数w1的值赋为1
sess.run(tf.assign(w1, 1))
sess.run(ema_op)
print( "current global_step:", sess.run(global_step))
print( "current w1", sess.run([w1, ema.average(w1)]))

# 更新global_step和w1的值,模拟出轮数为100时,参数w1变为10, 以下代码global_step保持为100,每次执行滑动平均操作,影子值会更新
sess.run(tf.assign(global_step, 100))
sess.run(tf.assign(w1, 10))
sess.run(ema_op)
print( "current global_step:", sess.run(global_step))
print( "current w1:", sess.run([w1, ema.average(w1)]))

# 每次sess.run会更新一次w1的滑动平均值
sess.run(ema_op)
print( "current global_step:" , sess.run(global_step))
print( "current w1:", sess.run([w1, ema.average(w1)]))

sess.run(ema_op)
print( "current global_step:" , sess.run(global_step))
print( "current w1:", sess.run([w1, ema.average(w1)]))

sess.run(ema_op)
print( "current global_step:" , sess.run(global_step))
print( "current w1:", sess.run([w1, ema.average(w1)]))

sess.run(ema_op)
print( "current global_step:" , sess.run(global_step))
print( "current w1:", sess.run([w1, ema.average(w1)]))

sess.run(ema_op)
print( "current global_step:" , sess.run(global_step))
print( "current w1:", sess.run([w1, ema.average(w1)]))

sess.run(ema_op)
print( "current global_step:" , sess.run(global_step))
print( "current w1:", sess.run([w1, ema.average(w1)]))

#更改 MOVING_AVERAGE_DECAY 为 0.1 看影子追随速度

正则化:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#coding:utf-8
#0导入模块 ,生成模拟数据集
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
BATCH_SIZE = 30
seed = 2
#基于seed产生随机数
rdm = np.random.RandomState(seed)
#随机数返回300行2列的矩阵,表示300组坐标点(x0,x1)作为输入数据集
X = rdm.randn(300,2)
#从X这个300行2列的矩阵中取出一行,判断如果两个坐标的平方和小于2,给Y赋值1,其余赋值0
#作为输入数据集的标签(正确答案)
Y_ = [int(x0*x0 + x1*x1 <2) for (x0,x1) in X]
#遍历Y中的每个元素,1赋值'red'其余赋值'blue',这样可视化显示时人可以直观区分
Y_c = [['red' if y else 'blue'] for y in Y_]
#对数据集X和标签Y进行shape整理,第一个元素为-1表示,随第二个参数计算得到,第二个元素表示多少列,把X整理为n行2列,把Y整理为n行1列
X = np.vstack(X).reshape(-1,2)
Y_ = np.vstack(Y_).reshape(-1,1)
print( X)
print( Y_)
print( Y_c)
#用plt.scatter画出数据集X各行中第0列元素和第1列元素的点即各行的(x0,x1),用各行Y_c对应的值表示颜色(c是color的缩写)
plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c))
plt.show()


#定义神经网络的输入、参数和输出,定义前向传播过程
def get_weight(shape, regularizer):
w = tf.Variable(tf.random_normal(shape), dtype=tf.float32)
tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w))
return w

def get_bias(shape):
b = tf.Variable(tf.constant(0.01, shape=shape))
return b

x = tf.placeholder(tf.float32, shape=(None, 2))
y_ = tf.placeholder(tf.float32, shape=(None, 1))

w1 = get_weight([2,11], 0.01)
b1 = get_bias([11])
y1 = tf.nn.relu(tf.matmul(x, w1)+b1)

w2 = get_weight([11,1], 0.01)
b2 = get_bias([1])
y = tf.matmul(y1, w2)+b2


#定义损失函数
loss_mse = tf.reduce_mean(tf.square(y-y_))
loss_total = loss_mse + tf.add_n(tf.get_collection('losses'))


#定义反向传播方法:不含正则化
train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_mse)

with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
STEPS = 40000
for i in range(STEPS):
start = (i*BATCH_SIZE) % 300
end = start + BATCH_SIZE
sess.run(train_step, feed_dict={x:X[start:end], y_:Y_[start:end]})
if i % 2000 == 0:
loss_mse_v = sess.run(loss_mse, feed_dict={x:X, y_:Y_})
print(("After %d steps, loss is: %f" %(i, loss_mse_v)))
#xx在-3到3之间以步长为0.01,yy在-3到3之间以步长0.01,生成二维网格坐标点
xx, yy = np.mgrid[-3:3:.01, -3:3:.01]
#将xx , yy拉直,并合并成一个2列的矩阵,得到一个网格坐标点的集合
grid = np.c_[xx.ravel(), yy.ravel()]
#将网格坐标点喂入神经网络 ,probs为输出
probs = sess.run(y, feed_dict={x:grid})
#probs的shape调整成xx的样子
probs = probs.reshape(xx.shape)
print( "w1:\n",sess.run(w1))
print( "b1:\n",sess.run(b1))
print( "w2:\n",sess.run(w2))
print( "b2:\n",sess.run(b2))

plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c))
plt.contour(xx, yy, probs, levels=[.5])
plt.show()



#定义反向传播方法:包含正则化
train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_total)

with tf.Session() as sess:
init_op = tf.global_variables_initializer()
sess.run(init_op)
STEPS = 40000
for i in range(STEPS):
start = (i*BATCH_SIZE) % 300
end = start + BATCH_SIZE
sess.run(train_step, feed_dict={x: X[start:end], y_:Y_[start:end]})
if i % 2000 == 0:
loss_v = sess.run(loss_total, feed_dict={x:X,y_:Y_})
print(("After %d steps, loss is: %f" %(i, loss_v)))

xx, yy = np.mgrid[-3:3:.01, -3:3:.01]
grid = np.c_[xx.ravel(), yy.ravel()]
probs = sess.run(y, feed_dict={x:grid})
probs = probs.reshape(xx.shape)
print( "w1:\n",sess.run(w1))
print( "b1:\n",sess.run(b1))
print( "w2:\n",sess.run(w2))
print( "b2:\n",sess.run(b2))

plt.scatter(X[:,0], X[:,1], c=np.squeeze(Y_c))
plt.contour(xx, yy, probs, levels=[.5])
plt.show()

Figure_1.png

Figure_1-1.png

Figure_1-2.png