# -*- coding: utf-8 -*- """08 - BagOfWords.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/16K9eNawK7Oli4ZnUm0r1nLcTiWRuTYW_ """ # Commented out IPython magic to ensure Python compatibility. # %%writefile 08-BagOfWords.py # import csv # # class BagOfWords: # def transform(self, processed_data): # """ # This function creates a Bag of Words (BoW) representation of the data. # # Steps: # 1. Read unique words from a file. # 2. Process the input data (processed_data) and count the occurrences of each unique word. # 3. Save the BoW representation to a CSV file. # """ # # # Step 1: Reading the unique words from "unique_words.txt" # unique_words = [] # List to store unique words # with open("05 - unique words.txt", "r") as in_file: # for line in in_file: # unique_words.append(line.strip()) # Add each word to the unique_words list # # print(f"Unique words: {len(unique_words)}") # Print the count of unique words # # # Step 2: Writing the columns (unique words) in the output "BagOfWords.csv" # with open("08 - BagOfWords.csv", mode="w", newline='') as out_file: # writer = csv.writer(out_file) # # # Write the header (unique words) # writer.writerow(unique_words) # # # Step 3: Creating the Bag of Words file # for data in processed_data: # word_count = {} # Dictionary to store word counts for the current sentence # # # Count the occurrences of words in the current sentence # for word in data: # word_count[word] = word_count.get(word, 0) + 1 # # # Write the word counts for each unique word in the CSV file # row = [] # for word in unique_words: # if word in word_count: # row.append(word_count[word]) # else: # row.append(0) # # writer.writerow(row) # Write the row to the CSV file # # print(f"Processed sentence {processed_data.index(data) + 1}") # # !python /content/08-BagOfWords.py