File size: 2,541 Bytes
231934b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import gradio as gr
import re

def parse_references(references):
    parsed_refs = []
    prev_author = None
    for ref in references.split('\n'):
        # Remove text in italics or quotation marks
        ref = re.sub(r'<i>.*?</i>|".*?"', '', ref)
        
        # Check if the reference starts with -- or a dash
        if ref.strip().startswith('--') or ref.strip().startswith('-'):
            author = prev_author
            ref = ref.lstrip('- ')  # Remove leading dashes and spaces
        else:
            # Find author (first word) and update prev_author
            author_match = re.search(r'\b\w+', ref)
            author = author_match.group() if author_match else None
            prev_author = author

        # Find year (first 4-digit number)
        year_match = re.search(r'\b\d{4}\b', ref)
        
        if author and year_match:
            parsed_refs.append({
                'author': author,
                'year': year_match.group(),
                'full_ref': ref.strip()
            })
    return parsed_refs

def find_citations(text, parsed_refs):
    citations = {}
    paragraphs = text.split('\n\n')
    
    for ref in parsed_refs:
        author = ref['author']
        year = ref['year']
        pattern = rf'{re.escape(author)}.*?{re.escape(year)}'
        
        citations[ref['full_ref']] = []
        for paragraph in paragraphs:
            matches = re.findall(pattern, paragraph, re.IGNORECASE)
            citations[ref['full_ref']].extend(matches)
    
    return citations

def process_text_and_references(text, references):
    parsed_refs = parse_references(references)
    citations = find_citations(text, parsed_refs)
    
    output = []
    for ref, cites in citations.items():
        output.append(f"Reference: {ref}")
        if cites:
            output.append("In-text citations:")
            for cite in cites:
                output.append(f"- {cite}")
        else:
            output.append("No in-text citations found.")
        output.append("")  # Add a blank line between references
    
    return "\n".join(output)

# Create Gradio interface
iface = gr.Interface(
    fn=process_text_and_references,
    inputs=[
        gr.Textbox(label="Text", lines=10),
        gr.Textbox(label="References", lines=10)
    ],
    outputs=gr.Textbox(label="References and In-text Citations"),
    title="In-text Citation Finder",
    description="Enter your text and references to find in-text citations for each reference."
)

if __name__ == "__main__":
    iface.launch()