Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
483 views
in Technique[技术] by (71.8m points)

python - Tensorflow seems to be using system memory not GPU, and the Program stops after global_variable_inititializer()

I just got a new GTX 1070 Founders Addition for my desktop, and I am trying to run tensorflow on this new GPU. I am using tensorflow.device() to run tensorflow on my GPU, but it seems like this is not happening. Instead it is using cpu, and almost all of my systems 8GB of ram. Here is my code:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.image as mpimg
import math

print("

")
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
#
with tf.device("/gpu:0"):
    # Helper Function To Print Percentage
    def showPercent(num, den, roundAmount):
        print(  str( round((num / den) * roundAmount )/roundAmount ) + " % ", end="
")
    # Defince The Number Of Images To Get
    def getFile(dir, getEveryNthLine):
        allFiles = list(os.listdir(dir))
        fileNameList = []

        numOfFiles = len(allFiles)
        i = 0
        for fichier in allFiles:
            if(i % 100 == 0):
                showPercent(i, numOfFiles, 100)

            if(i % getEveryNthLine == 0):
                if(fichier.endswith(".png")):
                    fileNameList.append(dir + "/" + fichier[0:-4])
            i += 1
        return fileNameList

    # Other Helper Functions
    def init_weights(shape):
        init_random_dist = tf.truncated_normal(shape, stddev=0.1, dtype=tf.float16)
        return tf.Variable(init_random_dist)
    def init_bias(shape):
        init_bias_vals = tf.constant(0.1, shape=shape, dtype=tf.float16)
        return tf.Variable(init_bias_vals)
    def conv2d(x, W):
        # x --> [batch, H, W, Channels]
        # W --> [filter H, filter W, Channels IN, Channels Out]

        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")
    def max_pool_2by2(x):
        # x --> [batch, H, W, Channels]
        return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
    def convolutional_layer(input_x, shape):
        W = init_weights(shape)
        b = init_bias([ shape[3] ])
        return tf.nn.relu(conv2d(input_x, W) + b)
    def normal_full_layer(input_layer, size):
        input_size = int(input_layer.get_shape()[1])
        W = init_weights([input_size, size])
        b = init_bias([size])
        return tf.matmul(input_layer, W) + b

    print("Getting Images")
    fileNameList = getFile("F:cartoonset10k-small", 1000)
    print("
loaded " + str(len(fileNameList)) + " files")

    print("Defining Placeholders")
    x_ph = tf.placeholder(tf.float16, shape=[None, 400, 400, 4])
    y_ph = tf.placeholder(tf.float16, shape=[None])

    print("Defining Conv and Pool layer 1")
    convo_1 = convolutional_layer(x_ph, shape=[5, 5, 4, 32])
    convo_1_pooling = max_pool_2by2(convo_1)

    print("Defining Conv and Pool layer 2")
    convo_2 = convolutional_layer(convo_1_pooling, shape=[5, 5, 32, 64])
    convo_2_pooling = max_pool_2by2(convo_2)

    print("Define Flat later and a Full layer")
    convo_2_flat = tf.reshape(convo_2_pooling, [-1, 400 * 400 * 64])
    full_layer_one = tf.nn.relu(normal_full_layer(convo_2_flat, 1024))
    y_pred = full_layer_one # Add Dropout Later

    def getLabels(filePath):
        df = []
        with open(filePath, "r") as file:
            for line in list(file):
                tempList = line.replace("
", "").replace('"', "").replace(" ", "").split(",")
                df.append({
                    "attr": tempList[0],
                    "value":int(tempList[1]),
                    "maxValue":int(tempList[2])
                })
        return df

    print("
Splitting And Formating X, and Y Data")
    x_data = []
    y_data = []
    numOfFiles = len(fileNameList)
    i = 0
    for file in fileNameList:
        if i % 10 == 0:
            showPercent(i, numOfFiles, 100)
        x_data.append(mpimg.imread(file + ".png"))
        y_data.append(pd.DataFrame(getLabels(file + ".csv"))["value"][0])
        i += 1

    print("
Conveting x_data to list")
    i = 0
    for indx in range(len(x_data)):
        if i % 10 == 0:
            showPercent(i, numOfFiles, 100)
        x_data[indx] = x_data[indx].tolist()
        i += 1

    print("

Performing Train Test Split")
    train_x, test_x, train_y, test_y = train_test_split(x_data, y_data, test_size=0.2)

    print("Defining Loss And Optimizer")
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits_v2(
            labels=y_ph,
            logits=y_pred
        )
    )
    optimizer = tf.train.AdadeltaOptimizer(learning_rate=0.001)
    train = optimizer.minimize(cross_entropy)

    print("Define Var Init")
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        print("Checkpoint Before Initializer")
        sess.run(init)
        print("Checkpoint After Initializer")
        batch_size = 8
        steps = 1
        i = 0
        for i in range(steps):
            if i % 10:
                print(i / 100, end="
")

            batch_x = []
            i = 0
            for i in np.random.randint(len(train_x), size=batch_size):
                showPercent(i, len(train_x), 100)
                train_x[i]
            batch_x = [train_x[i] for i in np.random.randint(len(train_x), size=batch_size) ]
            batch_y = [train_y[i] for i in np.random.randint(len(train_y), size=batch_size) ]
            print(sess.run(train, {
                x_ph:train_x,
                y_ph:train_y,
            }))

If you run this, this program seems to quit when I run global_variable_initializer(). It also prints in the terminal: Allocation of 20971520000 exceeds 10% of system memory. When looking at my task manager, I see this:

The program is using a lot of my CPU.

The program is using a lot of my Memory.

The program is using none of my GPU.

I am not shore why this is happening. I am using an anaconda environment, and have installed tensorflow-gpu. I would really appreciate anyones suggestions and help.

In addition, when I run this, the program stops after global_variable_initializer(). I am not sure if this is related to the problem above.

Tensorflow is version 1.12. CUDA is version 10.0.130.

Help would be greatly appreciated.

See Question&Answers more detail:os

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Reply

0 votes
by (71.8m points)

Try compare time (GPU vs CPU) with this simple example:

import tensorflow as tf
mnist = tf.keras.datasets.mnist

(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

def create_model():
    model = tf.keras.models.Sequential([
      tf.keras.layers.Flatten(input_shape=(28, 28)),
      tf.keras.layers.Dense(512, activation=tf.nn.relu),
      tf.keras.layers.Dropout(0.2),
      tf.keras.layers.Dense(10, activation=tf.nn.softmax)
    ])
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model    

epoch = 3

print('GPU:')
with tf.device('/gpu:0'):   
    model = create_model()

    model.fit(x_train, y_train, epochs=epoch)

print('
CPU:')
with tf.device('/cpu:0'):   
    model = create_model()

    model.fit(x_train, y_train, epochs=epoch)

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
OGeek|极客中国-欢迎来到极客的世界,一个免费开放的程序员编程交流平台!开放,进步,分享!让技术改变生活,让极客改变未来! Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...