import streamlit as st import difflib from docx import Document import re def read_file_content(uploaded_file): if uploaded_file.type == "text/plain": return uploaded_file.getvalue().decode("utf-8") elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": doc = Document(uploaded_file) return " ".join([paragraph.text for paragraph in doc.paragraphs]) else: raise ValueError("Unsupported file type") def calculate_similarity_and_matches(text1, text2): # Split the texts into sentences sentences1 = re.split(r'(?<=[.!?])\s+', text1) sentences2 = re.split(r'(?<=[.!?])\s+', text2) matcher = difflib.SequenceMatcher(None, sentences1, sentences2) similarity = matcher.ratio() * 100 matches = [] for match in matcher.get_matching_blocks(): if match.size > 0: matched_text = " ".join(sentences1[match.a:match.a + match.size]) matches.append(matched_text) return similarity, matches def suggest_changes(matches): suggestions = [] for match in matches: words = match.split() if len(words) > 5: # Only suggest changes for matches longer than 5 words suggestion = f"Consider rephrasing: '{match}'" suggestions.append(suggestion) return suggestions def main(): st.title("Enhanced Document Plagiarism Checker") doc1 = st.file_uploader("Upload first document", type=["txt", "docx"]) doc2 = st.file_uploader("Upload second document", type=["txt", "docx"]) if doc1 is not None and doc2 is not None: try: text1 = read_file_content(doc1) text2 = read_file_content(doc2) plagiarism_percentage, matches = calculate_similarity_and_matches(text1, text2) st.write(f"Plagiarism percentage: {plagiarism_percentage:.2f}%") if plagiarism_percentage < 20: st.write("The documents appear to be mostly different.") elif plagiarism_percentage < 50: st.write("There are some similarities between the documents.") else: st.write("The documents have significant similarities and may contain plagiarism.") if matches: st.subheader("Matched Content:") for i, match in enumerate(matches, 1): st.write(f"{i}. {match}") suggestions = suggest_changes(matches) if suggestions: st.subheader("Suggestions for Changes:") for i, suggestion in enumerate(suggestions, 1): st.write(f"{i}. {suggestion}") else: st.write("No significant matching content found.") except ValueError as e: st.error(f"Error: {str(e)}") if __name__ == "__main__": main()