Spaces:
Sleeping
Sleeping
import subprocess | |
import io | |
import tempfile | |
import gradio as gr | |
def run_katana(url, crawl_type): | |
try: | |
if crawl_type == "All URLs": | |
command = ["katana", "-u", url] | |
else: # Subkeyword URLs | |
command = [ | |
"katana", | |
"-u", f"{url}", | |
"-cs", f"^{url}.*", | |
"-depth", "5", | |
"-jc" | |
] | |
result = subprocess.run(command, capture_output=True, text=True, check=True) | |
# Create an in-memory file-like object | |
buffer = io.StringIO(result.stdout) | |
return result.stdout, buffer | |
except Exception as e: | |
return str(e), None | |
# Modify the process_and_display function to include the crawl_type parameter | |
def process_and_display(url, crawl_type): | |
result, file_data = run_katana(url, crawl_type) | |
if file_data: | |
# Create a temporary file with a meaningful name | |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.txt') | |
temp_file.write(file_data.getvalue().encode('utf-8')) | |
temp_file.close() | |
# Return the result and the path to the temporary file | |
return result, temp_file.name | |
else: | |
return result, None | |
#Update the Gradio interface to include the dropdown menu | |
iface = gr.Interface( | |
fn=process_and_display, | |
inputs=[ | |
gr.Textbox(label="Enter URL"), | |
gr.Dropdown(choices=["All URLs", "Subkeyword URLs"], label="Crawl Type") | |
], | |
outputs=[ | |
gr.Textbox(label="Crawl Results"), | |
gr.File(label="Download Results") | |
], | |
title="Katana Crawler", | |
description="Enter a URL to crawl using Katana. Select the crawl type and results will be displayed and available for download.", | |
allow_flagging="never" | |
) | |
iface.launch(server_name="0.0.0.0", server_port=7860) |