import gradio as gr
import aiimsscrapper
import amsscrapper
import degruyterscrapper
import sciencedirect
import sciencedirect_admaths
import springerscrapper
from urllib.parse import urlparse
from sheets import ExcelAutomator
from sgoogle import GoogleSheetAutomator
import os
import random
import string
import json
import base64
import crypto

auth = [
    (os.getenv("USERNAME1"), os.getenv("PASSWORD1")),
    (os.getenv("USERNAME2"), os.getenv("PASSWORD2")),
    (os.getenv("USERNAME3"), os.getenv("PASSWORD3"))
]

description = """
For bug reports or improvements, contact [@H4CK3R_5M4CK3R](https://t.me/H4CK3R_5M4CK3R) on Telegram.

**Usage Instructions:**

1. **Single Issue Scraping:**
   - Provide the issue link in the URL section.
   - Optionally, specify the desired output file name.

2. **Multiple Issues Scraping:**
   - Use curly braces `{}` in the URL to indicate where the volume (`v`) and issue (`i`) numbers should be inserted.
   - Define the range for volumes, not issues. Ensure you pass the volume range correctly.

3. **Read this before using google sheet feature**
    - **IMPORTANT** First make a google drive folder and then gave access to `sheettesting@testing-430816.iam.gserviceaccount.com` and `primearchiveofficial@gmail.com` super important.
    - Next make sure to check for make owner because it will transfer full control over to you you can delete or do anything as you like with the file.
    - You can also check for live addition of the data if you open the same folder and check for your output file.
    - You will get the file link in the output 4 you can also access it but only given mail will be able to access it.
    - Even after creating the file dont remove the access from the google drive folder as are gonna add more file if you like in there
    - You can get google drive folder id by just go to the drive.google.com -> create new folder -> click on : -> Share -> Enter the email given above both -> Make sure to gave Editor permission -> Click on Send
    - After this to get the drive folder id click on copy link and then it will look like this https://drive.google.com/drive/folders/folderid?usp=sharing now in this like **folderid** is the folder id.
    - **IMPORTANT** : After everything is done make sure to accept the ownership which you can do by Click on : -> Share -> Accept ownership and congo now you are the sweet owner of the file do as you like to do.

**Note:** 
- The range should be the volume range, not the issue range.
- Some authors may not have a listed address; their profile link will be included in the address section instead.
- After progress is completed make sure to click on clear because sometimes notification does't ring
"""

exmpl = [
        ["https://www.ams.org/journals/jams/2024-37-01/home.html?active=allissues", 0, 0, "example@gmail.com", "asdfasdfasdfasdfasdf", "example1", False, True],
        ["https://www.degruyter.com/journal/key/fca/20/2/html", 0, 0, "example@gmail.com", "asdfasdfasdfasdfasdf", "example2", False, True],
        ["https://www.degruyter.com/journal/key/fca/{v}/{i}/html", 22, 23, "example@gmail.com", "asdfasdfasdfasdfasdf", "example3", False, True],
        ["https://www.aimspress.com/math/article/2024/8/archive-articles", 0, 0, "example@gmail.com", "asdfasdfasdfasdfasdf", "example4", False, True],
        ["https://www.aimspress.com/math/article/{v}/{i}/archive-articles", 2021, 2022, "example@gmail.com", "asdfasdfasdfasdfasdf", "example5", False, True],
        ["https://link.springer.com/journal/208/volumes-and-issues/388-3", 0, 0, "example@gmail.com", "asdfasdfasdfasdfasdf", "example6", False, True],
        ["https://link.springer.com/journal/208/volumes-and-issues/{v}-{i}", 388, 389, "example@gmail.com", "asdfasdfasdfasdfasdf", "example7", False, True],
        ["https://www.sciencedirect.com/journal/advances-in-applied-mathematics/vol/158/suppl/C", 0, 0, "example@gmail.com", "asdfasdfasdfasdfasdf", "example8", False, True],
        ["https://www.sciencedirect.com/journal/acta-mathematica-scientia/vol/38/issue/6", 0, 0, "example@gmail.com", "asdfasdfasdfasdfasdf", "example9", False, True],
        ["https://www.sciencedirect.com/journal/acta-mathematica-scientia/vol/{v}/issue/{i}", 37, 38, "example@gmail.com", "asdfasdfasdfasdfasdf", "example10", False, True]
    ]

stop_work = False

def generate_random_filename(length=8):
    random_string = ''.join(random.choices(string.ascii_letters + string.digits, k=length))
    return random_string

def filterUrlandRun(url: str, from_range: int, to_range: int, reverse: bool, output: str, owner: bool, mail: str, folder_id: str, credit: dict, usegooglesheet: bool):
    if len(output.strip()) < 1:
        output = generate_random_filename()
    if os.path.exists(f"{output}.xlsx"):
        os.remove(f"{output}.xlsx")
    url_sch = urlparse(url)
    domain = url_sch.hostname
    if usegooglesheet:
        shet = GoogleSheetAutomator(
            [
                "Name",
                "Email",
                "Address"
            ],
            folder_id,
            outputfile=output,
            creds_dict=credit
        )
    sht = ExcelAutomator([
        "Name",
        "Email",
        "Address"
    ], output)
    filen = True
    if "{" in url:
        links = []
        if reverse:
            for vol in reversed(range(from_range, to_range+1)):
                links.append(url.format(v=vol, i="{i}"))
        else:
            for vol in range(from_range, to_range+1):
                links.append(url.format(v=vol, i="{i}"))
    else:
        links = [url]
        filen = False
    try:
        if domain == "www.ams.org" or domain == "ams.org":
            for ur in links:
                isu = 1
                while True:
                    if stop_work:
                        break
                    try:
                        if filen:
                            current_url = ur.format(i= (str(isu) if len(str(isu)) > 1 else f"0{isu}"))
                            allLinks = amsscrapper.getlinks(current_url)
                            isu += 1
                        else:
                            current_url = ur
                            allLinks = amsscrapper.getlinks(current_url)
                    except Exception as e:
                        break
                    yield {"current_url": current_url, "status": "fetching"}
                    for link in allLinks:
                        authors = amsscrapper.get_authors(link)
                        for index, auth in enumerate(authors, start=1):
                            sht.save(auth)
                            if usegooglesheet:
                                shet.save(auth)
                            yield {"author": auth, "index": index}
                    if not filen:
                        break
                    if usegooglesheet:
                        shet.save_to_file()
                    sht.save_to_file()
                if stop_work:
                    break
            if owner:
                if usegooglesheet:
                    shet.transfer_ownership(mail)
            if usegooglesheet:
                yield {"final_output": sht.save_to_file(), "link" : shet.save_to_file()}
            else:
                yield {"final_output": sht.save_to_file(), "link" : ""}
        
        elif domain == "www.degruyter.com" or domain == "degruyter.com":
            for ur in links:
                isu = 1
                while True:
                    if stop_work:
                        break
                    try:
                        if filen:
                            current_url = ur.format(i=isu)
                            allLinks = degruyterscrapper.getLinks(current_url)
                            isu += 1
                        else:
                            current_url = ur
                            allLinks = degruyterscrapper.getLinks(current_url)
                    except Exception as e:
                        break
                    yield {"current_url": current_url, "status": "fetching"}
                    for link in allLinks:
                        authors = degruyterscrapper.get_author_details(link)
                        for index, auth in enumerate(authors, start=1):
                            sht.save(auth)
                            if usegooglesheet:
                                shet.save(auth)
                            yield {"author": auth, "index": index}
                    if not filen:
                        break
                    if usegooglesheet:
                        shet.save_to_file()
                    sht.save_to_file()
                if stop_work:
                    break
            if owner:
                if usegooglesheet:
                    shet.transfer_ownership(mail)
            if usegooglesheet:
                yield {"final_output": sht.save_to_file(), "link" : shet.save_to_file()}
            else:
                yield {"final_output": sht.save_to_file(), "link" : ""}
        
        elif domain == "www.aimspress.com" or domain == "aimspress.com":
            for ur in links:
                isu = 1
                while True:
                    if stop_work:
                        break
                    try:
                        if filen:
                            current_url = ur.format(i=isu)
                            allLinks = aiimsscrapper.get_links(current_url)
                            isu += 1
                        else:
                            current_url = ur
                            allLinks = aiimsscrapper.get_links(current_url)
                    except Exception as e:
                        break
                    yield {"current_url": current_url, "status": "fetching"}
                    for link in allLinks:
                        authors = aiimsscrapper.get_author_details(link)
                        for index, auth in enumerate(authors, start=1):
                            sht.save(auth)
                            if usegooglesheet:
                                shet.save(auth)
                            yield {"author": auth, "index": index}
                    if not filen:
                        break
                    if usegooglesheet:
                        shet.save_to_file()
                    sht.save_to_file()
                if stop_work:
                    break
            if owner:
                if usegooglesheet:
                    shet.transfer_ownership(mail)
            if usegooglesheet:
                yield {"final_output": sht.save_to_file(), "link" : shet.save_to_file()}
            else:
                yield {"final_output": sht.save_to_file(), "link" : ""}
        
        elif domain == "link.springer.com":
            for ur in links:
                isu = 1
                while True:
                    if stop_work:
                        break
                    try:
                        if filen:
                            current_url = ur.format(i=isu)
                            allLinks = springerscrapper.get_all_articals_link(current_url)
                            isu += 1
                        else:
                            current_url = ur
                            allLinks = springerscrapper.get_all_articals_link(current_url)
                    except Exception as e:
                        break
                    yield {"current_url": current_url, "status": "fetching"}
                    for link in allLinks:
                        authors = springerscrapper.get_authors(link)
                        for index, auth in enumerate(authors, start=1):
                            sht.save(auth)
                            if usegooglesheet:
                                shet.save(auth)
                            yield {"author": auth, "index": index}
                    if not filen:
                        break
                    if usegooglesheet:
                        shet.save_to_file()
                    sht.save_to_file()
                if stop_work:
                    break
            if owner:
                if usegooglesheet:
                    shet.transfer_ownership(mail)
            if usegooglesheet:
                yield {"final_output": sht.save_to_file(), "link" : shet.save_to_file()}
            else:
                yield {"final_output": sht.save_to_file(), "link" : ""}
        
        elif domain == "www.sciencedirect.com":
            for ur in links:
                isu = 1
                while True:
                    if stop_work:
                        break
                    try:
                        if filen:
                            current_url = ur.format(i=isu)
                            authors, _ = sciencedirect.run(current_url)
                            isu += 1
                        else:
                            current_url = ur
                            authors, _ = sciencedirect.run(current_url)
                    except Exception as e:
                        break
                    yield {"current_url": current_url, "status": "fetching"}
                    for index, auth in enumerate(authors, start=1):
                        sht.save(auth)
                        if usegooglesheet:
                            shet.save(auth)
                        yield {"author": auth, "index": index}
                    if not filen:
                        break
                    sht.save_to_file()
                    if usegooglesheet:
                        shet.save_to_file()
                if stop_work:
                    break
            if owner:
                if usegooglesheet:
                    shet.transfer_ownership(mail)
            if usegooglesheet:
                yield {"final_output": sht.save_to_file(), "link" : shet.save_to_file()}
            else:
                yield {"final_output": sht.save_to_file(), "link" : ""}
        
        elif domain == "www.sciencedirect.com" and "acta-mathematica-scientia" in url:
            for ur in links:
                isu = 1
                while True:
                    if stop_work:
                        break
                    try:
                        if filen:
                            current_url = ur.format(i=isu)
                            authors, _ = sciencedirect_admaths.run(current_url)
                            isu += 1
                        else:
                            current_url = ur
                            authors, _ = sciencedirect_admaths.run(current_url)
                    except Exception as e:
                        break
                    yield {"current_url": current_url, "status": "fetching"}
                    for index, auth in enumerate(authors, start=1):
                        sht.save(auth)
                        if usegooglesheet:
                            shet.save(auth)
                        yield {"author": auth, "index": index}
                    if not filen:
                        break
                    if usegooglesheet:
                        shet.save_to_file()
                    sht.save_to_file()
                if stop_work:
                    break
            if owner:
                if usegooglesheet:
                    shet.transfer_ownership(mail)
            if usegooglesheet:
                yield {"final_output": sht.save_to_file(), "link" : shet.save_to_file()}
            else:
                yield {"final_output": sht.save_to_file(), "link" : ""}

        else:
            raise gr.Error("Invalid URL. Contact @H4CK3R_5M4CK3R on Telegram")
    except gr.Error:
        pass
    except Exception as e:
        raise gr.Error("An error occurred. Check your URL or contact @H4CK3R_5M4CK3R on Telegram")

def handle_url(url, From_volume: int, To_Volume: int, Gmail: str, FolderId: str, Output: str, MakeOwner:bool=True, UseGoogleSheet:bool=True, Reverse: bool=False):
    authors = []
    details = []
    final_output = None
    link = None
    credit = crypto.decrypt(os.getenv("KEY"), os.getenv("GOOGLE_AUTH_CREDENTIALS"))
    credit = json.loads(credit)

    for key, result in enumerate(filterUrlandRun(url, From_volume, To_Volume, Reverse, Output, MakeOwner, Gmail, FolderId, credit=credit, usegooglesheet=UseGoogleSheet)):
        if "final_output" in result:
            final_output = result["final_output"]
            link = result["link"]
        else:
            if "author" in result:
                author = result["author"]
                authors.append(f"Saving Author: {author.get('Name')}\n")
            if "current_url" in result:
                current_url = result["current_url"]
                details.append(f"Scraping: {current_url}\n")
        authors = authors[-3:] if len(authors) > 3 else authors
        details = details[-3:] if len(details) > 3 else details
        yield "\n".join(authors), "\n".join(details), final_output if final_output else None, gr.Audio("notification.mp3", autoplay=True) if final_output else None, link

interface = gr.Interface(
    fn=handle_url,
    inputs=["textbox", gr.Number(0, label="From Volume"), gr.Number(0, label="To Volume"), gr.Textbox(label="Access Gmail (Check Docs)"), gr.Textbox(label="Google Folder ID (Check Docs)"), gr.Textbox(label="Output File Name"), gr.Checkbox(True, label="Make Owner"), gr.Checkbox(True, label="Use Google Sheets"), "checkbox"],
    outputs=[gr.Markdown("LOGS", height="250px", elem_id="authorscroll"), gr.Markdown("", height="100px", elem_id="authorscroll"), "file", "audio", "textbox"],
    title="Web Scraper",
    description=description,
    examples=exmpl,
    cache_examples=False
)

interface.launch(
    share=False,
    show_api=False,
    auth=auth
    )