yassiracharki commited on
Commit
a4656ff
1 Parent(s): 50d91ab

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +100 -3
README.md CHANGED
@@ -1,3 +1,100 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ metrics:
4
+ - accuracy
5
+ pipeline_tag: text-classification
6
+ tags:
7
+ - cnn
8
+ - amazon_reviews
9
+ ---
10
+ # Model Card for Model ID
11
+
12
+ # Downloads
13
+ !pip install contractions
14
+ !pip install textsearch
15
+ !pip install tqdm
16
+
17
+ import nltk
18
+ nltk.download('punkt')
19
+
20
+ # Fundamental classes
21
+ import tensorflow as tf
22
+ from tensorflow import keras
23
+ import pandas as pd
24
+ import numpy as np
25
+
26
+ # Time
27
+ import time
28
+ import datetime
29
+
30
+ # Preprocessing
31
+ from tensorflow.keras.preprocessing.text import Tokenizer
32
+ from tensorflow.keras.preprocessing import sequence
33
+ from sklearn.preprocessing import LabelEncoder
34
+ import contractions
35
+ from bs4 import BeautifulSoup
36
+ import re
37
+ import tqdm
38
+ import unicodedata
39
+
40
+ seed = 3541
41
+ np.random.seed(seed)
42
+
43
+ # Define a dummy loss to bypass the error during model loading
44
+ def dummy_loss(y_true, y_pred):
45
+ return tf.reduce_mean(y_pred - y_true)
46
+
47
+ # Loading the model Trained on Amazon reviews
48
+ modelAmazon = keras.models.load_model(
49
+ '/kaggle/input/pre-trained-model-binary-cnn-nlp-amazon-reviews/tensorflow1/pre_trained_sentiment_analysis_cnn_model_amazon_reviews/1/Binary_Classification_86_Amazon_Reviews_CNN.h5',
50
+ compile=False
51
+ )
52
+
53
+ # Compile the model with the correct loss function and reduction
54
+ modelAmazon.compile(
55
+ optimizer='adam',
56
+ loss=keras.losses.BinaryCrossentropy(reduction=tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE),
57
+ metrics=['accuracy']
58
+ )
59
+
60
+ # Loading Amazon test data
61
+ dataset_test_Amazon = pd.read_csv('/kaggle/input/amazon-reviews-for-sa-binary-negative-positive-csv/amazon_review_sa_binary_csv/test.csv')
62
+
63
+ # Loading Amazon train data (to be used on the label encoder)
64
+ dataset_train_Amazon = pd.read_csv('/kaggle/input/amazon-reviews-for-sa-binary-negative-positive-csv/amazon_review_sa_binary_csv/train.csv')
65
+
66
+ # Shuffling the Test Data
67
+ test_Amazon = dataset_test_Amazon.sample(frac=1)
68
+ train_Amazon = dataset_train_Amazon.sample(frac=1)
69
+
70
+ # Taking a tiny portion of the database (because it will only be used on the label encoder)
71
+ train_Amazon = dataset_train_Amazon.iloc[:100, :]
72
+
73
+ # Taking only necessary columns
74
+ y_test_Amazon = test_Amazon['class_index'].values
75
+ X_train_Amazon = train_Amazon['review_text'].values
76
+ y_train_Amazon = train_Amazon['class_index'].values
77
+
78
+ # Preprocess corpus function
79
+ def pre_process_corpus(corpus):
80
+ processed_corpus = []
81
+ for doc in tqdm.tqdm(corpus):
82
+ doc = contractions.fix(doc)
83
+ doc = BeautifulSoup(doc, "html.parser").get_text()
84
+ doc = unicodedata.normalize('NFKD', doc).encode('ascii', 'ignore').decode('utf-8', 'ignore')
85
+ doc = re.sub(r'[^a-zA-Z\s]', '', doc, re.I|re.A)
86
+ doc = doc.lower()
87
+ doc = doc.strip()
88
+ processed_corpus.append(doc)
89
+ return processed_corpus
90
+
91
+ # Preprocessing the Data
92
+ X_test_Amazon = pre_process_corpus(test_Amazon['review_text'].values)
93
+ X_train_Amazon = pre_process_corpus(X_train_Amazon)
94
+
95
+ # Creating and Fitting the Tokenizer
96
+ etc ...
97
+
98
+ More info on the Model's page on Kaggle :
99
+
100
+ https://www.kaggle.com/models/yacharki/pre-trained-model-binary-cnn-nlp-amazon-reviews