diff --git a/src/py3.x/tensorflow2.x/EmotionData.xlsx b/src/py3.x/tensorflow2.x/EmotionData.xlsx new file mode 100644 index 00000000..9f4083a2 Binary files /dev/null and b/src/py3.x/tensorflow2.x/EmotionData.xlsx differ diff --git a/src/py3.x/tensorflow2.x/EmotionData的副本.xlsx b/src/py3.x/tensorflow2.x/EmotionData的副本.xlsx new file mode 100644 index 00000000..5ae205fd Binary files /dev/null and b/src/py3.x/tensorflow2.x/EmotionData的副本.xlsx differ diff --git a/src/py3.x/tensorflow2.x/config.py b/src/py3.x/tensorflow2.x/config.py index ed410bd3..985fc173 100644 --- a/src/py3.x/tensorflow2.x/config.py +++ b/src/py3.x/tensorflow2.x/config.py @@ -7,9 +7,14 @@ class Config(object): poetry_file = 'poetry.txt' - weight_file = 'poetry_model.h5' + data_file = 'EmotionData.xlsx' + model_file = 'EmotionModel.h5' + vocab_list = 'vocal_list.pkl' + word_index = 'word_index.pkl' # 根据前六个字预测第七个字 max_len = 6 batch_size = 512 learning_rate = 0.001 - pre_num = 2 \ No newline at end of file + pre_num = 3 + MAX_SEQUENCE_LENGTH = 1000 # 每个文本或者句子的截断长度,只保留1000个单词 + EMBEDDING_DIM = 60 # 词向量维度 diff --git a/src/py3.x/tensorflow2.x/test.ipynb b/src/py3.x/tensorflow2.x/test.ipynb new file mode 100644 index 00000000..8473f584 --- /dev/null +++ b/src/py3.x/tensorflow2.x/test.ipynb @@ -0,0 +1,190 @@ +{ + "nbformat": 4, + "nbformat_minor": 2, + "metadata": { + "language_info": { + "name": "python", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "version": "3.6.3" + }, + "orig_nbformat": 2, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "npconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "cells": [ + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": "/Users/jiangzl/.virtualenvs/python3.6/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n from ._conv import register_converters as _register_converters\nUsing TensorFlow backend.\n" + } + ], + "source": [ + "import sys\n", + "# 加载自定义包(添加:中间件)\n", + "sys.path.append(\"src/py3.x/tensorflow2.x\")\n", + "from text_Emotion import *" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "outfile = \"/opt/data/开源词向量/gensim_word2vec_60/Word60.model\"\n", + "# 加载词向量\n", + "Word2VecModel = loadMyWord2Vec(outfile)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": "空间的词向量(60 维): (60,) [ 2.2506642 -1.7324443 0.35593075 -3.7236977 -0.6317619 2.1253817\n -0.8911206 0.61192095 -2.5709946 5.6513844 2.3008282 -4.102604\n -0.61898416 -1.1190889 -6.060641 2.3529105 1.8131357 2.0764832\n -2.102738 -0.414962 -2.0553887 0.37966883 -2.015982 -1.4542716\n 3.191199 0.3265181 0.7307454 1.4761372 -2.2383723 0.925493\n 6.2617674 -1.3852879 0.6405419 -0.5601632 -1.084447 5.689829\n 0.46593904 -2.824275 4.2015862 -0.87934065 1.518804 -1.493514\n -1.9851282 -0.63166183 0.96814466 1.6375747 1.1566993 1.1981301\n 0.7950756 -3.0055897 1.2649575 1.2099069 1.9403213 1.3719954\n 2.6494706 1.8465079 -0.5507954 -2.3987298 -1.8990258 -4.651662 ]\n打印与空间最相近的5个词语: [('物件', 0.7354965806007385), ('维度', 0.7326242923736572), ('自由空间', 0.7247114181518555), ('拓扑', 0.7112817764282227), ('三维空间', 0.7062257528305054)]\n加载词向量结束..\n" + } + ], + "source": [ + "embeddings_matrix = load_embeding()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": "--: [[ 0. 0. 0. ... 0. 0.\n 0. ]\n [ 3.6153059 2.63272738 -0.98327219 ... 0.03685202 -0.78566265\n 1.06350613]\n [ 0.21444647 2.58100891 0.08306306 ... -0.43973923 -0.2102039\n -1.37015963]\n ...\n [-1.07420349 1.90465117 2.2614491 ... -1.90614116 -0.34697708\n -2.43622112]\n [ 1.53204441 0.60434735 -0.02905927 ... -0.04591536 -0.63762575\n 0.29778937]\n [ 0.20260553 0.03990031 -0.22745971 ... -0.17701624 0.16334218\n 0.06799572]]\n" + } + ], + "source": [ + "print('--: ', embeddings_matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "import os\n", + "import keras\n", + "import random\n", + "import gensim\n", + "import numpy as np\n", + "import pandas as pd\n", + "from keras import Model\n", + "from keras.models import load_model\n", + "from keras.layers import Dropout, Dense, Flatten, Bidirectional, Embedding, GRU, Input\n", + "from keras.optimizers import Adam\n", + "# 该目录下的 config.py文件, 数据文件是: poetry.txt\n", + "from config import Config\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": "空间的词向量(60 维): (60,) [ 2.2506642 -1.7324443 0.35593075 -3.7236977 -0.6317619 2.1253817\n -0.8911206 0.61192095 -2.5709946 5.6513844 2.3008282 -4.102604\n -0.61898416 -1.1190889 -6.060641 2.3529105 1.8131357 2.0764832\n -2.102738 -0.414962 -2.0553887 0.37966883 -2.015982 -1.4542716\n 3.191199 0.3265181 0.7307454 1.4761372 -2.2383723 0.925493\n 6.2617674 -1.3852879 0.6405419 -0.5601632 -1.084447 5.689829\n 0.46593904 -2.824275 4.2015862 -0.87934065 1.518804 -1.493514\n -1.9851282 -0.63166183 0.96814466 1.6375747 1.1566993 1.1981301\n 0.7950756 -3.0055897 1.2649575 1.2099069 1.9403213 1.3719954\n 2.6494706 1.8465079 -0.5507954 -2.3987298 -1.8990258 -4.651662 ]\n打印与空间最相近的5个词语: [('物件', 0.7354965806007385), ('维度', 0.7326242923736572), ('自由空间', 0.7247114181518555), ('拓扑', 0.7112817764282227), ('三维空间', 0.7062257528305054)]\n加载词向量结束..\n" + }, + { + "ename": "NameError", + "evalue": "name 'load_data' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mEmotionModel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mConfig\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/opt/git/AiLearning/src/py3.x/tensorflow2.x/text_Emotion.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, config)\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msummary\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 76\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 77\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 78\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 79\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mbuild_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0membeddings_matrix\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/git/AiLearning/src/py3.x/tensorflow2.x/text_Emotion.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 123\u001b[0m \u001b[0;34m'''训练模型'''\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 124\u001b[0m \u001b[0membeddings_matrix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_embeding\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 125\u001b[0;31m \u001b[0mx_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx_val\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_val\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 126\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuild_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0membeddings_matrix\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 127\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m128\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepochs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalidation_data\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_val\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_val\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'load_data' is not defined" + ] + } + ], + "source": [ + "model = EmotionModel(Config)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
labelcomment
01距离川沙公路较近,但是公交指示不对,如果是\"蔡陆线\"的话,会非常麻烦.建议用别的路线.房间较...
11商务大床房,房间很大,床有2M宽,整体感觉经济实惠不错!
21早餐太差,无论去多少人,那边也不加食品的。酒店应该重视一下这个问题了。房间本身很好。
31宾馆在小街道上,不大好找,但还好北京热心同胞很多~宾馆设施跟介绍的差不多,房间很小,确实挺小...
41CBD中心,周围没什么店铺,说5星有点勉强.不知道为什么卫生间没有电吹风
51总的来说,这样的酒店配这样的价格还算可以,希望他赶快装修,给我的客人留些好的印象
61价格比比较不错的酒店。这次免费升级了,感谢前台服务员。房子还好,地毯是新的,比上次的好些。早...
71不错,在同等档次酒店中应该是值得推荐的!
81入住丽晶,感觉很好。因为是新酒店,的确有淡淡的油漆味,房间内较新。房间大小合适,卫生间设备齐...
911。酒店比较新,装潢和设施还不错,只是房间有些油漆味。2。早餐还可以,只是品种不是很多。3。...
\n
", + "text/plain": " label comment\n0 1 距离川沙公路较近,但是公交指示不对,如果是\"蔡陆线\"的话,会非常麻烦.建议用别的路线.房间较...\n1 1 商务大床房,房间很大,床有2M宽,整体感觉经济实惠不错!\n2 1 早餐太差,无论去多少人,那边也不加食品的。酒店应该重视一下这个问题了。房间本身很好。\n3 1 宾馆在小街道上,不大好找,但还好北京热心同胞很多~宾馆设施跟介绍的差不多,房间很小,确实挺小...\n4 1 CBD中心,周围没什么店铺,说5星有点勉强.不知道为什么卫生间没有电吹风\n5 1 总的来说,这样的酒店配这样的价格还算可以,希望他赶快装修,给我的客人留些好的印象\n6 1 价格比比较不错的酒店。这次免费升级了,感谢前台服务员。房子还好,地毯是新的,比上次的好些。早...\n7 1 不错,在同等档次酒店中应该是值得推荐的!\n8 1 入住丽晶,感觉很好。因为是新酒店,的确有淡淡的油漆味,房间内较新。房间大小合适,卫生间设备齐...\n9 1 1。酒店比较新,装潢和设施还不错,只是房间有些油漆味。2。早餐还可以,只是品种不是很多。3。..." + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_excel(\"src/py3.x/tensorflow2.x/EmotionData.xlsx\", header=0, error_bad_lines=False, encoding=\"utf_8_sig\")\n", + "df.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": "[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]" + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y = df[\"label\"].tolist()\n", + "y[:10]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "def func(line, ngrams=[]):\n", + " # 加入我们的组合词,保证分词的准确性\n", + " if ngrams != []:\n", + " for word in ngrams:\n", + " jieba.add_word(\"\".join(word.lower()))\n", + " # # 将文本 ['1, 2, 3', '1, 2, .., n'] 分解为: [[1, 2, 3], [1, 2, .., n]]\n", + " words = [word for word in jieba.cut(str(line).lower(), cut_all=False)]\n", + " # print(\">>> \", train)\n", + " return \" \".join(words)\n", + "x = df[\"comment\"].apply(lambda line: func(line))\n", + "y" + ] + } + ] +} \ No newline at end of file diff --git a/src/py3.x/tensorflow2.x/text_Emotion.py b/src/py3.x/tensorflow2.x/text_Emotion.py index 87041ff1..6b4b8986 100644 --- a/src/py3.x/tensorflow2.x/text_Emotion.py +++ b/src/py3.x/tensorflow2.x/text_Emotion.py @@ -4,18 +4,37 @@ # https://blog.csdn.net/xiezj007/article/details/85073890 # https://www.cnblogs.com/Darwin2000/p/5786984.html # https://ai.tencent.com/ailab/nlp/embedding.html +# 数据集: +# https://blog.csdn.net/alip39/article/details/95891321 import re import os import keras import random import gensim import numpy as np +import pandas as pd +import jieba +from sklearn.model_selection import train_test_split from keras import Model from keras.models import load_model from keras.layers import Dropout, Dense, Flatten, Bidirectional, Embedding, GRU, Input +from keras.preprocessing.sequence import pad_sequences +from keras.utils.np_utils import to_categorical from keras.optimizers import Adam -# 该目录下的 config.py文件, 数据文件是: poetry.txt from config import Config +import pickle + + +# 存储模型: 持久化 +def load_pkl(filename): + with open(filename, 'rb') as fr: + model = pickle.load(fr) + return model + + +def save_pkl(model, filename): + with open(filename, 'wb') as fw: + pickle.dump(model, fw) ## 训练自己的词向量,并保存。 @@ -34,7 +53,7 @@ def loadMyWord2Vec(outfile): def load_embeding(): # 训练词向量(用空格隔开的文本) infile = "./CarCommentAll_cut.csv" - outfile = "/opt/data/开源词向量/gensim_word2vec_60/Word60.model" + outfile = "gensim_word2vec_60/Word60.model" # trainWord2Vec(infile, outfile) # 加载词向量 Word2VecModel = loadMyWord2Vec(outfile) @@ -60,7 +79,7 @@ def load_embeding(): word_vector[word] = Word2VecModel.wv[word] # 词语:词向量 embeddings_matrix[i + 1] = Word2VecModel.wv[word] # 词向量矩阵 print("加载词向量结束..") - return embeddings_matrix + return vocab_list, word_index, embeddings_matrix class EmotionModel(object): @@ -68,24 +87,26 @@ class EmotionModel(object): self.model = None self.config = config self.pre_num = self.config.pre_num + self.data_file = self.config.data_file + self.vocab_list = self.config.vocab_list + self.word_index = self.config.word_index + self.EMBEDDING_DIM = self.config.EMBEDDING_DIM + self.MAX_SEQUENCE_LENGTH = self.config.MAX_SEQUENCE_LENGTH # 如果模型文件存在则直接加载模型,否则开始训练 - if os.path.exists(self.config.weight_file): - self.model = load_model(self.config.weight_file) + if os.path.exists(self.config.model_file): + self.model = load_model(self.config.model_file) self.model.summary() else: self.train() def build_model(self, embeddings_matrix): - MAX_SEQUENCE_LENGTH = 1000 # 每个文本或者句子的截断长度,只保留1000个单词 - ## 4 在 keras的Embedding层中使用 预训练词向量 - EMBEDDING_DIM = 100 # 词向量维度 embedding_layer = Embedding( input_dim = len(embeddings_matrix), # 字典长度 - output_dim = EMBEDDING_DIM, # 词向量 长度(100) + output_dim = self.EMBEDDING_DIM, # 词向量 长度(60) weights = [embeddings_matrix], # 重点:预训练的词向量系数 - input_length = MAX_SEQUENCE_LENGTH, # 每句话的 最大长度(必须padding) + input_length = self.MAX_SEQUENCE_LENGTH, # 每句话的 最大长度(必须padding) trainable = False # 是否在 训练的过程中 更新词向量 ) # 如果不加载外界的,可以自己训练 @@ -98,43 +119,95 @@ class EmotionModel(object): print("开始训练模型.....") # 使用 - sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32') # 返回一个张量,长度为1000,也就是模型的输入为batch_size*1000 + sequence_input = Input(shape=(self.MAX_SEQUENCE_LENGTH,), dtype='int32') # 返回一个张量,长度为1000,也就是模型的输入为batch_size*1000 embedded_sequences = embedding_layer(sequence_input) # 返回batch_size*1000*100 x = Bidirectional(GRU(100, return_sequences=True))(embedded_sequences) x = Dropout(0.6)(x) x = Flatten()(x) - preds = Dense(len(self.pre_num), activation='softmax')(x) - model = Model(sequence_input, preds) + preds = Dense(self.pre_num, activation='softmax')(x) + self.model = Model(sequence_input, preds) # 设置优化器 optimizer = Adam(lr=self.config.learning_rate) - model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) - model.summary() + self.model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) + self.model.summary() - def predict(self, x_pred): + def load_word2jieba(self): + vocab_list = load_pkl(self.vocab_list) + if vocab_list != []: + for word in vocab_list: + jieba.add_word(word) + + def predict(self, line): '''预测''' - # x_pred - res = self.model.predict(x_pred, verbose=0)[0] + word_index = load_pkl(self.word_index) + STOPWORDS = ["-", "\t", "\n", ".", "。", ",", ",", ";", "!", "!", "?", "?", "%"] + words = [word for word in jieba.cut(str(line), cut_all=False) if word not in STOPWORDS] + indexs = [word_index.get(word, 0) for word in words] + x_pred = pad_sequences([indexs], maxlen=self.MAX_SEQUENCE_LENGTH) + res = self.model.predict(x_pred, verbose=0)[0] return res - def load_data(): - pass + def load_data(self, word_index, vocab_list, test_size=0.25): + STOPWORDS = ["-", "\t", "\n", ".", "。", ",", ",", ";", "!", "!", "?", "?", "%"] + if vocab_list != []: + for word in vocab_list: + jieba.add_word(word) + + def func(line): + # 将文本 ['1, 2, 3', '1, 2, .., n'] 分解为: [[1, 2, 3], [1, 2, .., n]] + words = [word for word in jieba.cut(str(line), cut_all=False) if word not in STOPWORDS] + indexs = [word_index.get(word, 0) for word in words] + return indexs + + df = pd.read_excel(self.data_file, header=0, error_bad_lines=False, encoding="utf_8_sig") + x = df["comment"].apply(lambda line: func(line)).tolist() + x = pad_sequences(x, maxlen=self.MAX_SEQUENCE_LENGTH) + y = df["label"].tolist() + # 按照大小和顺序,生成 label(0,1,2...自然数类型) + """ + In [7]: to_categorical(np.asarray([1,1,0,1,3])) + Out[7]: + array([[0., 1., 0., 0.], + [0., 1., 0., 0.], + [1., 0., 0., 0.], + [0., 1., 0., 0.], + [0., 0., 0., 1.]], dtype=float32) + """ + y = to_categorical(np.asarray(y)) + x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test_size, random_state=10000) + return (x_train, y_train), (x_test, y_test) + def train(self): '''训练模型''' - embeddings_matrix = load_embeding() - x_train, y_train, x_val, y_val = load_data() + vocab_list, word_index, embeddings_matrix = load_embeding() + save_pkl(vocab_list, self.vocab_list) + save_pkl(word_index, self.word_index) + (x_train, y_train), (x_test, y_test) = self.load_data(word_index, vocab_list) + print("---------") + print(x_train[:3], "\n", y_train[:3]) + print("\n") + print(x_test[:3], "\n", y_test[:3]) + print("---------") self.build_model(embeddings_matrix) - self.model.fit(x_train, y_train, batch_size=128, epochs=10, validation_data=(x_val, y_val)) - self.model.save(self.config.weight_file) + self.model.fit(x_train, y_train, batch_size=60, epochs=10) + self.model.evaluate(x_test, y_test, verbose=2) + self.model.save(self.config.model_file) if __name__ == '__main__': # 测试加载外界word2vec词向量 - load_embeding() - - # model = EmotionModel(Config) - # while 1: - # text = input("text:") - # res = model.predict(text) - # print(res) - + # vocab_list, word_index, embeddings_matrix = load_embeding() + model = EmotionModel(Config) + status = False + while 1: + text = input("text:") + if text in ["exit", "quit"]: + break + # 首次启动加载jieba词库 + if not status: + model.load_word2jieba() + status = True + res = model.predict(text) + label_dic = {0:"消极的", 1:"中性的", 2:"积极的"} + print(res, " : ",label_dic[np.argmax(res)])