import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

# Make NumPy printouts easier to read.
np.set_printoptions(precision=3, suppress=True)


import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

2022-11-10 13:56:58.547240: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'
column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',
                'Acceleration', 'Model Year', 'Origin']

raw_dataset = pd.read_csv(url, names=column_names,
                          na_values='?', comment='\t',
                          sep=' ', skipinitialspace=True)


dataset = raw_dataset.copy() # 存储问题，所以用copy() 不直接用等于
dataset.tail()


dataset.isna().sum()

MPG             0
Cylinders       0
Displacement    0
Horsepower      6
Weight          0
Acceleration    0
Model Year      0
Origin          0
dtype: int64


dataset = dataset.dropna()


dataset['Origin'].unique()

array([1, 3, 2])


dict = {1:'USA', 2:'Europe', 3:'Japan'}
dataset['Origin'] = dataset['Origin'].map(dict)
dataset['Origin']

0         USA
1         USA
2         USA
3         USA
4         USA
        ...  
393       USA
394    Europe
395       USA
396       USA
397       USA
Name: Origin, Length: 392, dtype: object


# tranform dataset from long to wide by dummy type!
dataset = pd.get_dummies(dataset, columns=['Origin'], prefix='', prefix_sep='')


train_dataset = dataset.sample(frac = 0.8, random_state = 0)
test_dataset = dataset.drop( train_dataset.index )


sns.pairplot(train_dataset[['MPG', 'Cylinders', 'Displacement', 'Weight']], diag_kind='kde')

<seaborn.axisgrid.PairGrid at 0x7fd29679a2e0>


train_dataset.describe().transpose()


train_features = train_dataset.copy()
test_features = test_dataset.copy()

train_labels = train_features.pop('MPG') # 提取 'MPG'列 作为新的series
test_labels = test_features.pop('MPG')


train_dataset.describe().transpose()[['mean', 'std']]


normalizer = tf.keras.layers.Normalization(axis = -1)


normalizer.adapt(np.array(train_features))

2022-11-10 14:38:45.336246: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


print(normalizer.mean.numpy())

[[   5.478  195.318  104.869 2990.252   15.559   75.898    0.178    0.197
     0.624]]


first = np.array(train_features[:1])

with np.printoptions(precision=2, suppress=True):
  print('First example:', first)
  print()
  print('Normalized:', normalizer(first).numpy())

First example: [[   4.    90.    75.  2125.    14.5   74.     0.     0.     1. ]]

Normalized: [[-0.87 -1.01 -0.79 -1.03 -0.38 -0.52 -0.47 -0.5   0.78]]


horsepower = np.array(train_features['Horsepower'])

# initialise the normalizer
horsepower_normalizer = layers.Normalization(input_shape = [1,], axis = None)
horsepower_normalizer.adapt(horsepower) # apply it


horsepower_model = tf.keras.Sequential([
    horsepower_normalizer, # 先通过normalizer
    layers.Dense(units=1) # only one output y = w*b
])
# initalise the whole model
horsepower_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 normalization_2 (Normalizat  (None, 1)                3         
 ion)                                                            
                                                                 
 dense (Dense)               (None, 1)                 2         
                                                                 
=================================================================
Total params: 5
Trainable params: 2
Non-trainable params: 3
_________________________________________________________________


horsepower_model.predict(horsepower[:10])

1/1 [==============================] - 0s 18ms/step

array([[-0.83 ],
       [-0.469],
       [ 1.533],
       [-1.164],
       [-1.053],
       [-0.413],
       [-1.248],
       [-1.053],
       [-0.274],
       [-0.469]], dtype=float32)


(horsepower[:10]-horsepower[:10].mean())/np.sqrt(horsepower[:10].var())

array([-0.373,  0.098,  2.705, -0.808, -0.663,  0.17 , -0.916, -0.663,
        0.351,  0.098])


horsepower_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error')


%%time
history = horsepower_model.fit(
    train_features['Horsepower'],
    train_labels,
    epochs=100,
    # Suppress logging.
    verbose=0,
    # Calculate validation results on 20% of the training data.
    validation_split = 0.2)

CPU times: user 3.04 s, sys: 329 ms, total: 3.37 s
Wall time: 2.98 s


hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()


def plot_loss(history):
  plt.plot(history.history['loss'], label='loss')
  plt.plot(history.history['val_loss'], label='val_loss')
  plt.ylim([0, 10])
  plt.xlabel('Epoch')
  plt.ylabel('Error [MPG]')
  plt.legend()
  plt.grid(True)


plot_loss(history)


test_results = {}

test_results['horsepower_model'] = horsepower_model.evaluate(
    test_features['Horsepower'],
    test_labels, verbose=0)


x = tf.linspace(0.0, 250, 251)
y = horsepower_model.predict(x)

8/8 [==============================] - 0s 1ms/step


def plot_horsepower(x, y):
  plt.scatter(train_features['Horsepower'], train_labels, label='Data')
  plt.plot(x, y, color='k', label='Predictions')
  plt.xlabel('Horsepower')
  plt.ylabel('MPG')
  plt.legend()


plot_horsepower(x, y)


linear_model = tf.keras.Sequential([
    normalizer,
    layers.Dense(units=1)
])


len(train_features.columns)

9


linear_model.predict( train_features[:5] )
# each row is a factor, 
# there are 9 factors and 5 repeats, each repeat gets one result

1/1 [==============================] - 0s 17ms/step

array([[ 1.222],
       [ 0.224],
       [-0.675],
       [-0.322],
       [ 1.619]], dtype=float32)


linear_model.layers[1].kernel

<tf.Variable 'dense_1/kernel:0' shape=(9, 1) dtype=float32, numpy=
array([[-0.238],
       [-0.464],
       [-0.31 ],
       [ 0.076],
       [ 0.257],
       [-0.731],
       [-0.522],
       [ 0.713],
       [ 0.27 ]], dtype=float32)>


linear_model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.1),
    loss = 'mean_absolute_error'
)


%%time
history = linear_model.fit(
    train_features,
    train_labels,
    epochs=100,
    # Suppress logging.
    verbose=0,
    # Calculate validation results on 20% of the training data.
    validation_split = 0.2)

CPU times: user 3.18 s, sys: 322 ms, total: 3.5 s
Wall time: 3.09 s


plot_loss(history)


test_results['linear_model'] = linear_model.evaluate(
    test_features, test_labels, verbose=0)


def build_and_compile_model(norm):
  model = keras.Sequential([
      # normalizer
      norm,
      # two hidden layers
      layers.Dense(64, activation='relu'),
      layers.Dense(64, activation='relu'),
      # one output
      layers.Dense(1)
  ])

  model.compile(loss='mean_absolute_error',
                optimizer=tf.keras.optimizers.Adam(0.001))
  return model


# 调用上函数，传入唯一的参数为 normalizer (此normaliser为1维)
dnn_horsepower_model = build_and_compile_model(horsepower_normalizer)

dnn_horsepower_model.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 normalization_2 (Normalizat  (None, 1)                3         
 ion)                                                            
                                                                 
 dense_17 (Dense)            (None, 64)                128       
                                                                 
 dense_18 (Dense)            (None, 64)                4160      
                                                                 
 dense_19 (Dense)            (None, 1)                 65        
                                                                 
=================================================================
Total params: 4,356
Trainable params: 4,353
Non-trainable params: 3
_________________________________________________________________


%%time
history = dnn_horsepower_model.fit(
    train_features['Horsepower'],
    train_labels,
    validation_split=0.2,
    verbose=0, epochs=100)

CPU times: user 3.36 s, sys: 444 ms, total: 3.8 s
Wall time: 3.17 s


plot_loss(history)


x = tf.linspace(0.0, 250, 251)
y = dnn_horsepower_model.predict(x)

8/8 [==============================] - 0s 1ms/step


plot_horsepower(x,y)


test_results['dnn_horsepower_model'] = dnn_horsepower_model.evaluate(
    test_features['Horsepower'], test_labels,
    verbose=0)


# 调用之前define的函数，传入normalizer （此normalizer为多维度）
dnn_model = build_and_compile_model(normalizer)
dnn_model.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 normalization_1 (Normalizat  (None, 9)                19        
 ion)                                                            
                                                                 
 dense_20 (Dense)            (None, 64)                640       
                                                                 
 dense_21 (Dense)            (None, 64)                4160      
                                                                 
 dense_22 (Dense)            (None, 1)                 65        
                                                                 
=================================================================
Total params: 4,884
Trainable params: 4,865
Non-trainable params: 19
_________________________________________________________________


%%time
history = dnn_model.fit(
    train_features,
    train_labels,
    validation_split=0.2,
    verbose=0, epochs=100)

CPU times: user 3.42 s, sys: 465 ms, total: 3.88 s
Wall time: 3.2 s


plot_loss(history)


test_results['dnn_model'] = dnn_model.evaluate(test_features, test_labels, verbose=0)


pd.DataFrame(test_results, index=['Mean absolute error [MPG]']).T


test_predictions = dnn_model.predict(test_features).flatten()

a = plt.axes(aspect='equal')
plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values [MPG]')
plt.ylabel('Predictions [MPG]')
lims = [0, 50]
plt.xlim(lims)
plt.ylim(lims)
plt.plot(lims, lims)

3/3 [==============================] - 0s 2ms/step

[<matplotlib.lines.Line2D at 0x7fd29c433b80>]


error = test_predictions - test_labels
plt.hist(error, bins=25)
plt.xlabel('Prediction Error [MPG]')
_ = plt.ylabel('Count')


dnn_model.save('dnn_model')

INFO:tensorflow:Assets written to: dnn_model/assets


reloaded = tf.keras.models.load_model('dnn_model')

test_results['reloaded'] = reloaded.evaluate(
    test_features, test_labels, verbose=0)


pd.DataFrame(test_results, index=['Mean absolute error [MPG]']).T

	MPG	Cylinders	Displacement	Horsepower	Weight	Acceleration	Model Year	Origin
393	27.0	4	140.0	86.0	2790.0	15.6	82	1
394	44.0	4	97.0	52.0	2130.0	24.6	82	2
395	32.0	4	135.0	84.0	2295.0	11.6	82	1
396	28.0	4	120.0	79.0	2625.0	18.6	82	1
397	31.0	4	119.0	82.0	2720.0	19.4	82	1

	count	mean	std	min	25%	50%	75%	max
MPG	314.0	23.310510	7.728652	10.0	17.00	22.0	28.95	46.6
Cylinders	314.0	5.477707	1.699788	3.0	4.00	4.0	8.00	8.0
Displacement	314.0	195.318471	104.331589	68.0	105.50	151.0	265.75	455.0
Horsepower	314.0	104.869427	38.096214	46.0	76.25	94.5	128.00	225.0
Weight	314.0	2990.251592	843.898596	1649.0	2256.50	2822.5	3608.00	5140.0
Acceleration	314.0	15.559236	2.789230	8.0	13.80	15.5	17.20	24.8
Model Year	314.0	75.898089	3.675642	70.0	73.00	76.0	79.00	82.0
Europe	314.0	0.178344	0.383413	0.0	0.00	0.0	0.00	1.0
Japan	314.0	0.197452	0.398712	0.0	0.00	0.0	0.00	1.0
USA	314.0	0.624204	0.485101	0.0	0.00	1.0	1.00	1.0

	mean	std
MPG	23.310510	7.728652
Cylinders	5.477707	1.699788
Displacement	195.318471	104.331589
Horsepower	104.869427	38.096214
Weight	2990.251592	843.898596
Acceleration	15.559236	2.789230
Model Year	75.898089	3.675642
Europe	0.178344	0.383413
Japan	0.197452	0.398712
USA	0.624204	0.485101

Tensorflow - Keras for Regression¶

Get the Data¶

Clean the Data¶

Split the Data into Training and Test Sets¶

Inspect the Data¶

Split Features from Labels¶

Normalisation¶

The Normalisation Layer¶

Linear Regression¶

Simple Regression¶

Multi-regression¶

Regression with a Deep Neural Network (DNN)¶

Regression using a DNN and a single input¶

Regression Using a DNN and Multiple Inputs¶

Performance¶

Make Predictions¶

Reference¶

	loss	val_loss	epoch
95	3.806144	4.163908	95
96	3.814678	4.198698	96
97	3.801707	4.187294	97
98	3.805012	4.188009	98
99	3.805693	4.181911	99

	Mean absolute error [MPG]
horsepower_model	3.652504
linear_model	2.514716
dnn_horsepower_model	2.910568
dnn_model	1.640190