import streamlit as st
import difflib
from docx import Document
import re

def read_file_content(uploaded_file):
    if uploaded_file.type == "text/plain":
        return uploaded_file.getvalue().decode("utf-8")
    elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
        doc = Document(uploaded_file)
        return " ".join([paragraph.text for paragraph in doc.paragraphs])
    else:
        raise ValueError("Unsupported file type")

def calculate_similarity_and_matches(text1, text2):
    # Split the texts into sentences
    sentences1 = re.split(r'(?<=[.!?])\s+', text1)
    sentences2 = re.split(r'(?<=[.!?])\s+', text2)
    
    matcher = difflib.SequenceMatcher(None, sentences1, sentences2)
    similarity = matcher.ratio() * 100
    
    matches = []
    for match in matcher.get_matching_blocks():
        if match.size > 0:
            matched_text = " ".join(sentences1[match.a:match.a + match.size])
            matches.append(matched_text)
    
    return similarity, matches

def suggest_changes(matches):
    suggestions = []
    for match in matches:
        words = match.split()
        if len(words) > 5:  # Only suggest changes for matches longer than 5 words
            suggestion = f"Consider rephrasing: '{match}'"
            suggestions.append(suggestion)
    return suggestions

def main():
    st.title("Enhanced Document Plagiarism Checker")

    doc1 = st.file_uploader("Upload first document", type=["txt", "docx"])
    doc2 = st.file_uploader("Upload second document", type=["txt", "docx"])

    if doc1 is not None and doc2 is not None:
        try:
            text1 = read_file_content(doc1)
            text2 = read_file_content(doc2)

            plagiarism_percentage, matches = calculate_similarity_and_matches(text1, text2)

            st.write(f"Plagiarism percentage: {plagiarism_percentage:.2f}%")

            if plagiarism_percentage < 20:
                st.write("The documents appear to be mostly different.")
            elif plagiarism_percentage < 50:
                st.write("There are some similarities between the documents.")
            else:
                st.write("The documents have significant similarities and may contain plagiarism.")

            if matches:
                st.subheader("Matched Content:")
                for i, match in enumerate(matches, 1):
                    st.write(f"{i}. {match}")

                suggestions = suggest_changes(matches)
                if suggestions:
                    st.subheader("Suggestions for Changes:")
                    for i, suggestion in enumerate(suggestions, 1):
                        st.write(f"{i}. {suggestion}")
            else:
                st.write("No significant matching content found.")

        except ValueError as e:
            st.error(f"Error: {str(e)}")

if __name__ == "__main__":
    main()