yassiracharki
commited on
Commit
•
a4656ff
1
Parent(s):
50d91ab
Update README.md
Browse files
README.md
CHANGED
@@ -1,3 +1,100 @@
|
|
1 |
-
---
|
2 |
-
license: apache-2.0
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
metrics:
|
4 |
+
- accuracy
|
5 |
+
pipeline_tag: text-classification
|
6 |
+
tags:
|
7 |
+
- cnn
|
8 |
+
- amazon_reviews
|
9 |
+
---
|
10 |
+
# Model Card for Model ID
|
11 |
+
|
12 |
+
# Downloads
|
13 |
+
!pip install contractions
|
14 |
+
!pip install textsearch
|
15 |
+
!pip install tqdm
|
16 |
+
|
17 |
+
import nltk
|
18 |
+
nltk.download('punkt')
|
19 |
+
|
20 |
+
# Fundamental classes
|
21 |
+
import tensorflow as tf
|
22 |
+
from tensorflow import keras
|
23 |
+
import pandas as pd
|
24 |
+
import numpy as np
|
25 |
+
|
26 |
+
# Time
|
27 |
+
import time
|
28 |
+
import datetime
|
29 |
+
|
30 |
+
# Preprocessing
|
31 |
+
from tensorflow.keras.preprocessing.text import Tokenizer
|
32 |
+
from tensorflow.keras.preprocessing import sequence
|
33 |
+
from sklearn.preprocessing import LabelEncoder
|
34 |
+
import contractions
|
35 |
+
from bs4 import BeautifulSoup
|
36 |
+
import re
|
37 |
+
import tqdm
|
38 |
+
import unicodedata
|
39 |
+
|
40 |
+
seed = 3541
|
41 |
+
np.random.seed(seed)
|
42 |
+
|
43 |
+
# Define a dummy loss to bypass the error during model loading
|
44 |
+
def dummy_loss(y_true, y_pred):
|
45 |
+
return tf.reduce_mean(y_pred - y_true)
|
46 |
+
|
47 |
+
# Loading the model Trained on Amazon reviews
|
48 |
+
modelAmazon = keras.models.load_model(
|
49 |
+
'/kaggle/input/pre-trained-model-binary-cnn-nlp-amazon-reviews/tensorflow1/pre_trained_sentiment_analysis_cnn_model_amazon_reviews/1/Binary_Classification_86_Amazon_Reviews_CNN.h5',
|
50 |
+
compile=False
|
51 |
+
)
|
52 |
+
|
53 |
+
# Compile the model with the correct loss function and reduction
|
54 |
+
modelAmazon.compile(
|
55 |
+
optimizer='adam',
|
56 |
+
loss=keras.losses.BinaryCrossentropy(reduction=tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE),
|
57 |
+
metrics=['accuracy']
|
58 |
+
)
|
59 |
+
|
60 |
+
# Loading Amazon test data
|
61 |
+
dataset_test_Amazon = pd.read_csv('/kaggle/input/amazon-reviews-for-sa-binary-negative-positive-csv/amazon_review_sa_binary_csv/test.csv')
|
62 |
+
|
63 |
+
# Loading Amazon train data (to be used on the label encoder)
|
64 |
+
dataset_train_Amazon = pd.read_csv('/kaggle/input/amazon-reviews-for-sa-binary-negative-positive-csv/amazon_review_sa_binary_csv/train.csv')
|
65 |
+
|
66 |
+
# Shuffling the Test Data
|
67 |
+
test_Amazon = dataset_test_Amazon.sample(frac=1)
|
68 |
+
train_Amazon = dataset_train_Amazon.sample(frac=1)
|
69 |
+
|
70 |
+
# Taking a tiny portion of the database (because it will only be used on the label encoder)
|
71 |
+
train_Amazon = dataset_train_Amazon.iloc[:100, :]
|
72 |
+
|
73 |
+
# Taking only necessary columns
|
74 |
+
y_test_Amazon = test_Amazon['class_index'].values
|
75 |
+
X_train_Amazon = train_Amazon['review_text'].values
|
76 |
+
y_train_Amazon = train_Amazon['class_index'].values
|
77 |
+
|
78 |
+
# Preprocess corpus function
|
79 |
+
def pre_process_corpus(corpus):
|
80 |
+
processed_corpus = []
|
81 |
+
for doc in tqdm.tqdm(corpus):
|
82 |
+
doc = contractions.fix(doc)
|
83 |
+
doc = BeautifulSoup(doc, "html.parser").get_text()
|
84 |
+
doc = unicodedata.normalize('NFKD', doc).encode('ascii', 'ignore').decode('utf-8', 'ignore')
|
85 |
+
doc = re.sub(r'[^a-zA-Z\s]', '', doc, re.I|re.A)
|
86 |
+
doc = doc.lower()
|
87 |
+
doc = doc.strip()
|
88 |
+
processed_corpus.append(doc)
|
89 |
+
return processed_corpus
|
90 |
+
|
91 |
+
# Preprocessing the Data
|
92 |
+
X_test_Amazon = pre_process_corpus(test_Amazon['review_text'].values)
|
93 |
+
X_train_Amazon = pre_process_corpus(X_train_Amazon)
|
94 |
+
|
95 |
+
# Creating and Fitting the Tokenizer
|
96 |
+
etc ...
|
97 |
+
|
98 |
+
More info on the Model's page on Kaggle :
|
99 |
+
|
100 |
+
https://www.kaggle.com/models/yacharki/pre-trained-model-binary-cnn-nlp-amazon-reviews
|