import gradio as gr
import aiimsscrapper
import amsscrapper
import degruyterscrapper
import sciencedirect
import sciencedirect_admaths
import springerscrapper
from urllib.parse import urlparse
from sheets import ExcelAutomator
from sgoogle import GoogleSheetAutomator
import os
import random
import string
import json
import crypto

auth = [
    (os.getenv("USERNAME1"), os.getenv("PASSWORD1")),
    (os.getenv("USERNAME2"), os.getenv("PASSWORD2")),
    (os.getenv("USERNAME3"), os.getenv("PASSWORD3"))
]

description = """
For bug reports or improvements, contact [@H4CK3R_5M4CK3R](https://t.me/H4CK3R_5M4CK3R) on Telegram.

**Usage Instructions:**

1. **Single Issue Scraping:**
   - Provide the issue link in the URL section.
   - Optionally, specify the desired output file name.

2. **Multiple Issues Scraping:**
   - Use curly braces `{}` in the URL to indicate where the volume (`v`) and issue (`i`) numbers should be inserted.
   - Define the range for volumes, not issues. Ensure you pass the volume range correctly.
   - To pass the range you must have use () in start of the url something like (to:from)-link/{v}/{i} checkout example below.
   - Now you can also pass the multiple links too and add range into same as above.

3. **Read this before using google sheet feature**
    - **IMPORTANT** First make a google drive folder and then gave access to `sheettesting@testing-430816.iam.gserviceaccount.com` and `primearchiveofficial@gmail.com` super important.
    - Next make sure to check for make owner because it will transfer full control over to you you can delete or do anything as you like with the file.
    - You can also check for live addition of the data if you open the same folder and check for your output file.
    - You will get the file link in the output 4 you can also access it but only given mail will be able to access it.
    - Even after creating the file dont remove the access from the google drive folder as are gonna add more file if you like in there
    - You can get google drive folder id by just go to the drive.google.com -> create new folder -> click on : -> Share -> Enter the email given above both -> Make sure to gave Editor permission -> Click on Send
    - After this to get the drive folder id click on copy link and then it will look like this https://drive.google.com/drive/folders/folderid?usp=sharing now in this like **folderid** is the folder id.
    - **IMPORTANT** : After everything is done make sure to accept the ownership which you can do by Click on : -> Share -> Accept ownership and congo now you are the sweet owner of the file do as you like to do.


**Note:** 
- The range should be the volume range, not the issue range.
- Some authors may not have a listed address; their profile link will be included in the address section instead.
- After progress is completed make sure to click on clear because sometimes notification does't ring

If you are copying this space make sure to contact the owner as mention above

"""

exmpl = [
        ["https://www.ams.org/journals/jams/2024-37-01/home.html?active=allissues", "example@gmail.com", "asdfasdfasdfasdfasdf", "example1", True, True, False],
        ["https://www.degruyter.com/journal/key/fca/20/2/html", "example@gmail.com", "asdfasdfasdfasdfasdf", "example2", True, True, False],
        ["(22:23)-https://www.degruyter.com/journal/key/fca/{v}/{i}/html", "example@gmail.com", "asdfasdfasdfasdfasdf", "example3", True, True, False],
        ["https://www.aimspress.com/math/article/2024/8/archive-articles", "example@gmail.com", "asdfasdfasdfasdfasdf", "example4", True, True, False],
        ["(2021:2022)-https://www.aimspress.com/math/article/{v}/{i}/archive-articles", "example@gmail.com", "asdfasdfasdfasdfasdf", "example5", True, True, False],
        ["https://link.springer.com/journal/208/volumes-and-issues/388-3", "example@gmail.com", "asdfasdfasdfasdfasdf", "example6", True, True, False],
        ["(388:389)-https://link.springer.com/journal/208/volumes-and-issues/{v}-{i}", "example@gmail.com", "asdfasdfasdfasdfasdf", "example7", True, True, False],
        ["https://www.sciencedirect.com/journal/advances-in-applied-mathematics/vol/158/suppl/C", "example@gmail.com", "asdfasdfasdfasdfasdf", "example8", True, True, False],
        ["https://www.sciencedirect.com/journal/acta-mathematica-scientia/vol/38/issue/6", "example@gmail.com", "asdfasdfasdfasdfasdf", "example9", True, True, False],
        ["(37:38)-https://www.sciencedirect.com/journal/acta-mathematica-scientia/vol/{v}/issue/{i}", "example@gmail.com", "asdfasdfasdfasdfasdf", "example10", True, True, False],
        ["(37:38)-https://www.sciencedirect.com/journal/acta-mathematica-scientia/vol/{v}/issue/{i}\n(388:389)-https://link.springer.com/journal/208/volumes-and-issues/{v}-{i}\n(2021:2022)-https://www.aimspress.com/math/article/{v}/{i}/archive-articles\n(22:23)-https://www.degruyter.com/journal/key/fca/{v}/{i}/html", "example@gmail.com", "asdfasdfasdfasdfasdf", "example11", True, True, False]
    ]

stop_work = False

def generate_random_filename(length=8):
    random_string = ''.join(random.choices(string.ascii_letters + string.digits, k=length))
    return random_string

def filterUrlandRun(url: str, from_range: int, to_range: int, reverse: bool, output: str, owner: bool, mail: str, folder_id: str, credit: dict, usegooglesheet: bool, multiUrls: bool):
    if len(output.strip()) < 1:
        output = generate_random_filename()
    if os.path.exists(f"{output}.xlsx"):
        os.remove(f"{output}.xlsx")
    url_sch = urlparse(url)
    domain = url_sch.hostname
    if usegooglesheet:
        shet = GoogleSheetAutomator(
            [
                "Name",
                "Address",
                "Email"
            ],
            folder_id,
            outputfile=output,
            creds_dict=credit
        )
    sht = ExcelAutomator([
        "Name",
        "Address",
        "Email"
    ], output)
    filen = True
    if "{" in url:
        links = []
        if reverse:
            for vol in reversed(range(from_range, to_range+1)):
                links.append(url.format(v=vol, i="{i}"))
        else:
            for vol in range(from_range, to_range+1):
                links.append(url.format(v=vol, i="{i}"))
    else:
        links = [url]
        filen = False
    try:
        if domain == "www.ams.org" or domain == "ams.org":
            for ur in links:
                isu = 1
                while True:
                    if stop_work:
                        break
                    try:
                        if filen:
                            current_url = ur.format(i= (str(isu) if len(str(isu)) > 1 else f"0{isu}"))
                            allLinks = amsscrapper.getlinks(current_url)
                            isu += 1
                        else:
                            current_url = ur
                            allLinks = amsscrapper.getlinks(current_url)
                    except Exception as e:
                        break
                    yield {"current_url": current_url, "status": "fetching"}
                    for link in allLinks:
                        authors = amsscrapper.get_authors(link)
                        for index, auth in enumerate(authors, start=1):
                            sht.save(auth)
                            if usegooglesheet:
                                shet.save(auth)
                            yield {"author": auth, "index": index}
                    if not filen:
                        break
                    if usegooglesheet:
                        shet.save_to_file()
                    sht.save_to_file()
                if stop_work:
                    break
            if owner:
                if usegooglesheet:
                    shet.transfer_ownership(mail)
            if usegooglesheet:
                yield {"final_output": sht.save_to_file(), "link" : shet.save_to_file()}
            else:
                yield {"final_output": sht.save_to_file(), "link" : ""}
        
        elif domain == "www.degruyter.com" or domain == "degruyter.com":
            for ur in links:
                isu = 1
                while True:
                    if stop_work:
                        break
                    try:
                        if filen:
                            current_url = ur.format(i=isu)
                            allLinks = degruyterscrapper.getLinks(current_url)
                            isu += 1
                        else:
                            current_url = ur
                            allLinks = degruyterscrapper.getLinks(current_url)
                    except Exception as e:
                        break
                    yield {"current_url": current_url, "status": "fetching"}
                    for link in allLinks:
                        authors = degruyterscrapper.get_author_details(link)
                        for index, auth in enumerate(authors, start=1):
                            sht.save(auth)
                            if usegooglesheet:
                                shet.save(auth)
                            yield {"author": auth, "index": index}
                    if not filen:
                        break
                    if usegooglesheet:
                        shet.save_to_file()
                    sht.save_to_file()
                if stop_work:
                    break
            if owner:
                if usegooglesheet:
                    shet.transfer_ownership(mail)
            if usegooglesheet:
                yield {"final_output": sht.save_to_file(), "link" : shet.save_to_file()}
            else:
                yield {"final_output": sht.save_to_file(), "link" : ""}
        
        elif domain == "www.aimspress.com" or domain == "aimspress.com":
            for ur in links:
                isu = 1
                while True:
                    if stop_work:
                        break
                    try:
                        if filen:
                            current_url = ur.format(i=isu)
                            allLinks = aiimsscrapper.get_links(current_url)
                            isu += 1
                        else:
                            current_url = ur
                            allLinks = aiimsscrapper.get_links(current_url)
                    except Exception as e:
                        break
                    yield {"current_url": current_url, "status": "fetching"}
                    for link in allLinks:
                        authors = aiimsscrapper.get_author_details(link)
                        for index, auth in enumerate(authors, start=1):
                            sht.save(auth)
                            if usegooglesheet:
                                shet.save(auth)
                            yield {"author": auth, "index": index}
                    if not filen:
                        break
                    if usegooglesheet:
                        shet.save_to_file()
                    sht.save_to_file()
                if stop_work:
                    break
            if owner:
                if usegooglesheet:
                    shet.transfer_ownership(mail)
            if usegooglesheet:
                yield {"final_output": sht.save_to_file(), "link" : shet.save_to_file()}
            else:
                yield {"final_output": sht.save_to_file(), "link" : ""}
        
        elif domain == "link.springer.com":
            for ur in links:
                isu = 1
                while True:
                    if stop_work:
                        break
                    try:
                        if filen:
                            current_url = ur.format(i=isu)
                            allLinks = springerscrapper.get_all_articals_link(current_url)
                            isu += 1
                        else:
                            current_url = ur
                            allLinks = springerscrapper.get_all_articals_link(current_url)
                    except Exception as e:
                        break
                    yield {"current_url": current_url, "status": "fetching"}
                    for link in allLinks:
                        authors = springerscrapper.get_authors(link)
                        for index, auth in enumerate(authors, start=1):
                            sht.save(auth)
                            if usegooglesheet:
                                shet.save(auth)
                            yield {"author": auth, "index": index}
                    if not filen:
                        break
                    if usegooglesheet:
                        shet.save_to_file()
                    sht.save_to_file()
                if stop_work:
                    break
            if owner:
                if usegooglesheet:
                    shet.transfer_ownership(mail)
            if usegooglesheet:
                yield {"final_output": sht.save_to_file(), "link" : shet.save_to_file()}
            else:
                yield {"final_output": sht.save_to_file(), "link" : ""}
        
        elif domain == "www.sciencedirect.com":
            oldtitle = ""
            for ur in links:
                isu = 1
                while True:
                    if stop_work:
                        break
                    try:
                        if filen:
                            current_url = ur.format(i=isu)
                            authors, title = sciencedirect.run(current_url)
                            if title == oldtitle:
                                break
                            else:
                                oldtitle = title
                            isu += 1
                        else:
                            current_url = ur
                            authors, title = sciencedirect.run(current_url)
                            if title == oldtitle:
                                break
                            else:
                                oldtitle = title
                    except Exception as e:
                        break
                    yield {"current_url": current_url, "status": "fetching"}
                    for index, auth in enumerate(authors, start=1):
                        sht.save(auth)
                        if usegooglesheet:
                            shet.save(auth)
                        yield {"author": auth, "index": index}
                    if not filen:
                        break
                    sht.save_to_file()
                    if usegooglesheet:
                        shet.save_to_file()
                if stop_work:
                    break
            if owner:
                if usegooglesheet:
                    shet.transfer_ownership(mail)
            if usegooglesheet:
                yield {"final_output": sht.save_to_file(), "link" : shet.save_to_file()}
            else:
                yield {"final_output": sht.save_to_file(), "link" : ""}
        
        elif domain == "www.sciencedirect.com" and "acta-mathematica-scientia" in url:
            oldtitle = ""
            for ur in links:
                isu = 1
                while True:
                    if stop_work:
                        break
                    try:
                        if filen:
                            current_url = ur.format(i=isu)
                            authors, title = sciencedirect_admaths.run(current_url)
                            if title == oldtitle:
                                break
                            else:
                                oldtitle = title
                            isu += 1
                        else:
                            current_url = ur
                            authors, title = sciencedirect_admaths.run(current_url)
                            if title == oldtitle:
                                break
                            else:
                                oldtitle = title
                    except Exception as e:
                        break
                    yield {"current_url": current_url, "status": "fetching"}
                    for index, auth in enumerate(authors, start=1):
                        sht.save(auth)
                        if usegooglesheet:
                            shet.save(auth)
                        yield {"author": auth, "index": index}
                    if not filen:
                        break
                    if usegooglesheet:
                        shet.save_to_file()
                    sht.save_to_file()
                if stop_work:
                    break
            if owner:
                if usegooglesheet:
                    shet.transfer_ownership(mail)
            if usegooglesheet:
                yield {"final_output": sht.save_to_file(), "link" : shet.save_to_file()}
            else:
                yield {"final_output": sht.save_to_file(), "link" : ""}

        else:
            raise gr.Error("Invalid URL. Contact @H4CK3R_5M4CK3R on Telegram")
    except gr.Error:
        pass
    except Exception as e:
        raise gr.Error("An error occurred. Check your URL or contact @H4CK3R_5M4CK3R on Telegram")

def handle_url(Link: str, Gmail: str, FolderId: str, Output: str, MakeOwner:bool=True, UseGoogleSheet:bool=True, Reverse: bool=False):
    if len(FolderId) < 2:
        FolderId = os.getenv("FOLDER_ID")
    if len(Gmail) < 2:
        Gmail = os.getenv("GMAIL")
    authors = []
    details = []
    final_output = None
    multi_output = None
    link = None
    From_volume = 0
    To_Volume = 0
    urls = []
    if "\n" in Link:
        urls = Link.split("\n")
        multiUrls = True
    else:
        urls.append(Link)
        multiUrls = False
    print(f"URLS : {urls}")
    for url in urls:
        if url.startswith("("):
            From_volume = int(url.split("(", 1)[1].split(":")[0])
            To_Volume = int(url.split("(", 1)[1].split(":")[1].split(")", 1)[0])
            url = url.split("-", 1)[1].strip()
        try:
            credit = crypto.decrypt(os.getenv("KEY"), os.getenv("GOOGLE_AUTH_CREDENTIALS"))
            credit = json.loads(credit)
        except:
            pass

        for _, result in enumerate(filterUrlandRun(url, From_volume, To_Volume, Reverse, Output, MakeOwner, Gmail, FolderId, credit=credit, usegooglesheet=UseGoogleSheet, multiUrls=multiUrls)):
            if "final_output" in result:
                if multiUrls == False:
                    final_output = result["final_output"]
                else:
                    multi_output = result["final_output"]
                link = result["link"]
            else:
                if "author" in result:
                    author = result["author"]
                    authors.append(f"Saving Author: {author.get('Name')}\n")
                if "current_url" in result:
                    current_url = result["current_url"]
                    details.append(f"Scraping: {current_url}\n")
            authors = authors[-3:] if len(authors) > 3 else authors
            details = details[-3:] if len(details) > 3 else details
            if multiUrls:
                final_output = None
            yield "\n".join(authors), "\n".join(details), final_output if final_output else None, gr.Audio("notification.mp3", autoplay=True) if final_output else None, link
        if multiUrls == True:
            yield "\n".join(authors), "\n".join(details), multi_output, gr.Audio("notification.mp3", autoplay=True), link

interface = gr.Interface(
    fn=handle_url,
    inputs=[gr.TextArea(label="Url / Url's", placeholder="Enter the url or multiple urls to scrap"), gr.Textbox(label="Access Gmail (Check Docs)"), gr.Textbox(label="Google Folder ID (Check Docs)"), gr.Textbox(label="Output File Name"), gr.Checkbox(True, label="Make Owner"), gr.Checkbox(True, label="Use Google Sheets"), "checkbox"],
    outputs=[gr.Markdown("LOGS", height="250px", elem_id="authorscroll"), gr.Markdown("", height="100px", elem_id="authorscroll"), "file", "audio", "textbox"],
    title="Web Scraper",
    description=description,
    examples=exmpl,
    cache_examples=False
)

interface.launch(
    share=False,
    show_api=False,
    auth=auth
    )