import logging import json import os import re from deep_translator import GoogleTranslator from gematria import calculate_gematria import math import csv # Configure the logger # You can uncomment the next line to enable debugging logs # logging.basicConfig(level=logging.DEBUG, format='%(levelname)s:%(message)s') logger = logging.getLogger(__name__) def process_json_files(start=40, end=66, step=1, rounds="1", length=0, tlang="en", strip_spaces=True, strip_in_braces=True, strip_diacritics=True, translate=False): """ Process a CSV file containing biblical texts and perform various text manipulations. Parameters: - start (int): Starting book number. - end (int): Ending book number. - step (int): Step value for character selection. - rounds (str): Comma-separated string of round values (can include floats). - length (int): Maximum length of the result text. - tlang (str): Target language for translation. - strip_spaces (bool): Whether to remove spaces from the text. - strip_in_braces (bool): Whether to remove text within braces. - strip_diacritics (bool): Whether to remove diacritics from the text. - translate (bool): Whether to translate the result text. Returns: - list: A list of dictionaries containing processed data or error messages. """ file_name = "texts/bible/OpenGNT_version3_3.csv" translator = GoogleTranslator(source='auto', target=tlang) if translate else None results = [] # Dictionary for the 27 books of the New Testament (English names) nt_books = { 40: "Matthew", 41: "Mark", 42: "Luke", 43: "John", 44: "Acts", 45: "Romans", 46: "1 Corinthians", 47: "2 Corinthians", 48: "Galatians", 49: "Ephesians", 50: "Philippians", 51: "Colossians", 52: "1 Thessalonians", 53: "2 Thessalonians", 54: "1 Timothy", 55: "2 Timothy", 56: "Titus", 57: "Philemon", 58: "Hebrews", 59: "James", 60: "1 Peter", 61: "2 Peter", 62: "1 John", 63: "2 John", 64: "3 John", 65: "Jude", 66: "Revelation" } try: with open(file_name, 'r', encoding='utf-8') as file: reader = csv.DictReader(file, delimiter='\t') book_texts = {} current_book = None for row_num, row in enumerate(reader, start=1): try: # Parse the book number from '〔Book|Chapter|Verse〕' field book_field = row['〔Book|Chapter|Verse〕'] book_str = book_field.split('|')[0] # e.g., '〔40' book_num_str = book_str.lstrip('〔') # Remove leading '〔' book = int(book_num_str) if book < start or book > end: continue if current_book != book: current_book = book book_texts[book] = "" # Parse the Greek text from '〔OGNTk|OGNTu|OGNTa|lexeme|rmac|sn〕' field greek_field = row['〔OGNTk|OGNTu|OGNTa|lexeme|rmac|sn〕'] # Extract the first part before '〔' and split by '|' if '〔' in greek_field: greek_text = greek_field.split('〔')[1] greek_text = greek_text.split('|')[0] else: greek_text = greek_field.split('|')[0] book_texts[book] += greek_text + " " except (KeyError, IndexError, ValueError) as e: logger.error(f"Error parsing row {row_num}: {e}") continue # Skip this row and continue for book, full_text in book_texts.items(): logger.debug(f"Processing book {book}") clean_text = full_text if strip_in_braces: clean_text = re.sub(r"\[.*?\]|\{.*?\}|\<.*?\>", "", clean_text, flags=re.DOTALL) if strip_diacritics: # Adjusted regex for Greek diacritics clean_text = re.sub(r"[^\u0370-\u03FF\u1F00-\u1FFF ]+", "", clean_text) # Optionally, remove specific diacritics or punctuation if needed # clean_text = re.sub(r'[additional patterns]', '', clean_text) # Normalize spaces clean_text = clean_text.replace("\n\n ", " ") clean_text = clean_text.replace("\n", " ") clean_text = re.sub(r'\s+', ' ', clean_text).strip() if strip_spaces: clean_text = clean_text.replace(" ", "") text_length = len(clean_text) logger.debug(f"Clean text for book {book}: Length = {text_length}") if text_length == 0: logger.warning(f"No text available for book {book} after cleaning.") continue # Skip processing if there's no text try: rounds_list = list(map(float, rounds.split(','))) # Allow floats except ValueError as e: logger.error(f"Invalid rounds parameter: {e}") return [{"error": f"Invalid rounds parameter: {e}"}] result_text = "" for r in rounds_list: abs_r = abs(r) # Determine the number of full passes and the remainder. full_passes = math.floor(abs_r) remainder = abs_r - full_passes # Base number of characters per pass base_chars = text_length // step if base_chars == 0: if abs_r > 1: # Changed from >=1 to >1 # When step > text_length and rounds >1, pick 1 character per full pass chars_per_full_pass = 1 logger.debug(f"Book {book}: step > text_length ({step} > {text_length}), selecting 1 character per full pass.") else: # No characters to pick chars_per_full_pass = 0 logger.debug(f"Book {book}: step > text_length ({step} > {text_length}) and rounds <=1, no characters selected.") # For remainder, since base_chars=0, no remainder characters chars_for_remainder = 0 else: # Normal case chars_per_full_pass = base_chars chars_for_remainder = math.floor(base_chars * remainder) # Partial pass logger.debug(f"Book {book}: Normal case, chars_per_full_pass = {chars_per_full_pass}, chars_for_remainder = {chars_for_remainder}") if r > 0: current_index = (step - 1) % text_length direction = 1 else: current_index = (text_length - step) % text_length direction = -1 pass_result = "" # Full passes, keep only the last pass for pass_num in range(1, full_passes + 1): current_pass_chars = "" for _ in range(chars_per_full_pass): if chars_per_full_pass == 0: break current_pass_chars += clean_text[current_index] current_index = (current_index + direction * step) % text_length # Keep only the last full pass if pass_num == full_passes: pass_result = current_pass_chars logger.debug(f"Book {book}: Pass {pass_num}, pass_result = {pass_result}") # Remainder pass for fractional rounds if remainder > 0 and chars_for_remainder > 0: current_pass_chars = "" for _ in range(chars_for_remainder): current_pass_chars += clean_text[current_index] current_index = (current_index + direction * step) % text_length pass_result += current_pass_chars logger.debug(f"Book {book}: Remainder pass_result = {pass_result}") # Handle cases where step > text_length and chars_per_full_pass=1 if base_chars == 0 and chars_per_full_pass == 1 and full_passes > 0: # pass_result already contains the last character picked pass elif base_chars == 0 and chars_per_full_pass == 0 and full_passes > 0: # When no characters are picked, skip appending pass result_text += pass_result logger.debug(f"Result text for book {book}: {result_text}") if length != 0: result_text = result_text[:length] logger.debug(f"Book {book}: Result text truncated to length {length}.") # Translate the result text if required try: translated_text = translator.translate(result_text) if translator and result_text else "" except Exception as e: logger.error(f"Book {book}: Translation error: {e}") translated_text = "" # Calculate the Gematria sum try: result_sum = calculate_gematria(result_text) except Exception as e: logger.error(f"Book {book}: Gematria calculation error: {e}") result_sum = None if result_text: result = { 'book': f"Bible {book}.", 'title': nt_books.get(book, "Unknown Book"), 'result_text': result_text, 'result_sum': result_sum, 'translated_text': translated_text, 'source_language': 'el' } results.append(result) except FileNotFoundError: logger.error(f"File {file_name} not found.") results.append({"error": f"File {file_name} not found."}) except Exception as e: logger.error(f"An unexpected error occurred: {e}") results.append({"error": f"An unexpected error occurred: {e}"}) return results if results else None # Tests test_results = [ (process_json_files(40,40,386,rounds="1,0.5,-1,-0.5"), "τωιεοννναυοοαμπυρααυοιξοηαϲοιιωομκνοοουομρυοιεχοοδεαλαννοτοκϲααυϲϲτεαδαϲαιεευαιηαεηαμαλκμαιακιγνμυνετνυυθθεγιεδρεαοαντηοκεοατϲνπναολαεοοφεηεϲωμκουμερρυοϲαοοαϲλτιηιωδυνϲυτυιχοονεαηωντολθβοτεαιαυοηετιτεαννυεινϲεενωκωξρυρηρνϲξεαγεαϲατωιεοννναυοοαμπυρααυοιξοηαϲοιιωομκνοοουομρυοιεχοοδεαλαννοτοκϲααυϲϲτεαδαϲαιεευαιηαεηαμαλκμαιακιγνμυνετνυυθθεγιεδρεαοϲτοαϲωθειιυνδνδξλονυταολαϲαττμτννοερτεοροανιεκτεεαιιϲωεαμϲωικμτποκϲϲιορϲπμοκαιουτωτδοωαξεαγωεφτωυπμαλυττταεομττλυαεπατονεαξτομυχαωηνυοβωπτυυκξαπιτααπυενεροοτϲαααυηοανηλταιθεαντινοιλκιβπγοδαιοηωαδαετακυϲοηυουαυνωαεαοττυαεεωτανεκβγεϲτοαϲωθειιυνδνδξλονυταολαϲαττμτννοερτεοροανιεκτεεαιιϲωεαμϲωικμτποκϲϲιορϲπμοκαιουτωτδοωαξεαγωεφτωυπμαλυττταεομττλυαε"), (process_json_files(40,40,386,rounds="1,-1"), "τωιεοννναυοοαμπυρααυοιξοηαϲοιιωομκνοοουομρυοιεχοοδεαλαννοτοκϲααυϲϲτεαδαϲαιεευαιηαεηαμαλκμαιακιγνμυνετνυυθθεγιεδρεαοαντηοκεοατϲνπναολαεοοφεηεϲωμκουμερρυοϲαοοαϲλτιηιωδυνϲυτυιχοονεαηωντολθβοτεαιαυοηετιτεαννυεινϲεενωκωξρυρηρνϲξεαγεαϲαϲτοαϲωθειιυνδνδξλονυταολαϲαττμτννοερτεοροανιεκτεεαιιϲωεαμϲωικμτποκϲϲιορϲπμοκαιουτωτδοωαξεαγωεφτωυπμαλυττταεομττλυαεπατονεαξτομυχαωηνυοβωπτυυκξαπιτααπυενεροοτϲαααυηοανηλταιθεαντινοιλκιβπγοδαιοηωαδαετακυϲοηυουαυνωαεαοττυαεεωτανεκβγε"), #(process_json_files(1, 1, 21, rounds="3", length=0), ""), #(process_json_files(1, 1, 22, rounds="1", length=0), ""), #(process_json_files(1, 1, 22, rounds="3", length=0), ""), #(process_json_files(1, 1, 23, rounds="3", length=0), ""), #(process_json_files(1, 1, 11, rounds="1", length=0), ""), #(process_json_files(1, 1, 2, rounds="1", length=0), ""), #(process_json_files(1, 1, 23, rounds="1", length=0), None), # Expect None, when no results #(process_json_files(1, 1, 23, rounds="-1", length=0), None), # Expect None, when no results #(process_json_files(1, 1, 22, rounds="-1", length=0), ""), #(process_json_files(1, 1, 22, rounds="-2", length=0), ""), #(process_json_files(1, 1, 1, rounds="-1", length=0), ""), # Reversed Hebrew alphabet #(process_json_files(1, 1, 1, rounds="1,-1", length=0), ""), # Combined rounds #(process_json_files(1, 1, 22, rounds="1,-1", length=0, average_compile=True), ""), # average compile test (400+1) / 2 = math.ceil(200.5)=201=200+1="רא" ] all_tests_passed = True for result, expected in test_results: if expected is None: # Check if no result is expected if not result: logger.warning(f"Test passed: Expected no results, got no results.") else: logger.error(f"Test failed: Expected no results, but got: {result}") all_tests_passed = False else: # Check if result is not empty before accessing elements if result: result_text = result[0]['result_text'] if result_text == expected: logger.warning(f"Test passed: Expected '{expected}', got '{result_text}'") else: logger.error(f"Test failed: Expected '{expected}', but got '{result_text}'") all_tests_passed = False else: logger.error(f"Test failed: Expected '{expected}', but got no results") all_tests_passed = False if all_tests_passed: logger.info("All round tests passed.")