|
import gradio as gr |
|
import requests |
|
import re |
|
import time |
|
import logging |
|
from functools import lru_cache |
|
from urllib.parse import urlparse, urljoin |
|
from typing import Dict, Any, Optional, List |
|
from dataclasses import dataclass |
|
from datetime import datetime |
|
from fastapi import FastAPI, Request |
|
from fastapi.responses import StreamingResponse, RedirectResponse, JSONResponse |
|
from fastapi.middleware.cors import CORSMiddleware |
|
|
|
|
|
logging.basicConfig( |
|
level=logging.INFO, |
|
format='%(asctime)s - %(levelname)s - %(message)s' |
|
) |
|
|
|
@dataclass |
|
class ProxyResponse: |
|
"""代理响应数据类""" |
|
status: int |
|
content: Any |
|
headers: Dict[str, str] |
|
redirect_url: Optional[str] = None |
|
error: Optional[str] = None |
|
|
|
class Config: |
|
"""配置类""" |
|
ASSET_URL = "https://1pages.nbid.bid/" |
|
PREFIX = "/" |
|
JSDELIVR = 0 |
|
CACHE_TTL = 3600 |
|
MAX_RETRIES = 3 |
|
TIMEOUT = 10 |
|
RATE_LIMIT = { |
|
"window_ms": 15 * 60 * 1000, |
|
"max": 100 |
|
} |
|
WHITE_LIST: List[str] = [] |
|
|
|
|
|
DEFAULT_HEADERS = { |
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" |
|
} |
|
|
|
|
|
CORS = { |
|
"allow_origins": ["*"], |
|
"allow_methods": ["GET", "POST", "OPTIONS"], |
|
"allow_headers": ["*"], |
|
"max_age": 1728000 |
|
} |
|
|
|
|
|
PATTERNS = { |
|
"releases": r"^(?:https?:\/\/)?github\.com\/.+?\/.+?\/(?:releases|archive)\/.*$", |
|
"blob": r"^(?:https?:\/\/)?github\.com\/.+?\/.+?\/(?:blob|raw)\/.*$", |
|
"git": r"^(?:https?:\/\/)?github\.com\/.+?\/.+?\/(?:info|git-).*$", |
|
"raw": r"^(?:https?:\/\/)?raw\.(?:githubusercontent|github)\.com\/.+?\/.+?\/.+?\/.+$", |
|
"gist": r"^(?:https?:\/\/)?gist\.(?:githubusercontent|github)\.com\/.+?\/.+?\/.+$", |
|
"tags": r"^(?:https?:\/\/)?github\.com\/.+?\/.+?\/tags.*$" |
|
} |
|
|
|
class RateLimiter: |
|
"""请求频率限制器""" |
|
def __init__(self): |
|
self.request_records: Dict[str, List[float]] = {} |
|
|
|
def is_allowed(self, ip: str) -> bool: |
|
now = time.time() * 1000 |
|
window_start = now - Config.RATE_LIMIT["window_ms"] |
|
|
|
if ip not in self.request_records: |
|
self.request_records[ip] = [] |
|
|
|
|
|
self.request_records[ip] = [t for t in self.request_records[ip] if t > window_start] |
|
|
|
if len(self.request_records[ip]) >= Config.RATE_LIMIT["max"]: |
|
return False |
|
|
|
self.request_records[ip].append(now) |
|
return True |
|
|
|
def get_remaining(self, ip: str) -> int: |
|
"""获取剩余请求次数""" |
|
if ip not in self.request_records: |
|
return Config.RATE_LIMIT["max"] |
|
return Config.RATE_LIMIT["max"] - len(self.request_records[ip]) |
|
|
|
class GitHubProxy: |
|
"""GitHub代理核心类""" |
|
def __init__(self): |
|
self.rate_limiter = RateLimiter() |
|
self.session = requests.Session() |
|
self.session.headers.update(Config.DEFAULT_HEADERS) |
|
|
|
def check_url(self, url: str) -> bool: |
|
"""检查URL是否匹配GitHub模式""" |
|
return any(re.search(pattern, url, re.I) for pattern in Config.PATTERNS.values()) |
|
|
|
def check_white_list(self, url: str) -> bool: |
|
"""检查白名单""" |
|
if not Config.WHITE_LIST: |
|
return True |
|
return any(white_item in url for white_item in Config.WHITE_LIST) |
|
|
|
@lru_cache(maxsize=1000) |
|
def fetch_github_content(self, url: str, method: str = "GET", stream: bool = False) -> ProxyResponse: |
|
"""获取GitHub内容(带缓存)""" |
|
try: |
|
response = self.session.request( |
|
method=method, |
|
url=url, |
|
timeout=Config.TIMEOUT, |
|
allow_redirects=False, |
|
stream=stream |
|
) |
|
|
|
headers = dict(response.headers) |
|
|
|
|
|
if response.is_redirect: |
|
redirect_url = response.headers["Location"] |
|
if self.check_url(redirect_url): |
|
redirect_url = Config.PREFIX + redirect_url |
|
return ProxyResponse( |
|
status=response.status_code, |
|
content="", |
|
headers=headers, |
|
redirect_url=redirect_url |
|
) |
|
|
|
|
|
if stream: |
|
return ProxyResponse( |
|
status=response.status_code, |
|
content=response, |
|
headers=headers |
|
) |
|
|
|
|
|
content_type = response.headers.get('content-type', '') |
|
is_binary = not any(text_type in content_type.lower() for text_type in ['text', 'json', 'xml', 'html']) |
|
|
|
content = response.content if is_binary else response.text |
|
|
|
return ProxyResponse( |
|
status=response.status_code, |
|
content=content, |
|
headers=headers |
|
) |
|
|
|
except requests.Timeout: |
|
return ProxyResponse( |
|
status=504, |
|
content="Request Timeout", |
|
headers={}, |
|
error="请求超时" |
|
) |
|
except Exception as e: |
|
logging.error(f"Fetch error: {str(e)}") |
|
return ProxyResponse( |
|
status=500, |
|
content=str(e), |
|
headers={}, |
|
error="服务器内部错误" |
|
) |
|
|
|
def proxy_request(self, url: str, request: Request) -> Dict[str, Any]: |
|
"""处理代理请求""" |
|
|
|
logging.info(f"Proxy request from {request.client.host} to {url}") |
|
|
|
|
|
if not self.rate_limiter.is_allowed(request.client.host): |
|
return { |
|
"status": 429, |
|
"content": "Too Many Requests", |
|
"headers": {}, |
|
"error": "请求过于频繁,请稍后再试", |
|
"rate_limit": { |
|
"remaining": self.rate_limiter.get_remaining(request.client.host), |
|
"reset": int((time.time() * 1000 + Config.RATE_LIMIT["window_ms"]) / 1000) |
|
} |
|
} |
|
|
|
|
|
if not self.check_white_list(url): |
|
return { |
|
"status": 403, |
|
"content": "Access Denied", |
|
"headers": {}, |
|
"error": "访问被拒绝" |
|
} |
|
|
|
|
|
if not url.startswith(("http://", "https://")): |
|
url = "https://" + url |
|
|
|
|
|
if not self.check_url(url): |
|
return { |
|
"status": 400, |
|
"content": "Invalid GitHub URL", |
|
"headers": {}, |
|
"error": "无效的GitHub URL" |
|
} |
|
|
|
|
|
if Config.JSDELIVR and re.search(Config.PATTERNS["blob"], url): |
|
url = url.replace("/blob/", "@").replace("github.com", "cdn.jsdelivr.net/gh") |
|
return { |
|
"status": 302, |
|
"content": "", |
|
"headers": {"Location": url}, |
|
"redirect_url": url |
|
} |
|
|
|
|
|
response = self.fetch_github_content(url) |
|
result = { |
|
"status": response.status, |
|
"content": response.content, |
|
"headers": response.headers, |
|
"timestamp": datetime.now().isoformat() |
|
} |
|
|
|
if response.redirect_url: |
|
result["redirect_url"] = response.redirect_url |
|
if response.error: |
|
result["error"] = response.error |
|
|
|
return result |
|
|
|
|
|
api = FastAPI() |
|
|
|
|
|
api.add_middleware( |
|
CORSMiddleware, |
|
allow_origins=["*"], |
|
allow_credentials=True, |
|
allow_methods=["*"], |
|
allow_headers=["*"], |
|
) |
|
|
|
def create_interface(): |
|
"""创建Gradio界面""" |
|
proxy = GitHubProxy() |
|
|
|
@api.get("/{path:path}") |
|
async def proxy_download(request: Request, path: str): |
|
"""处理直接代理请求""" |
|
if not path: |
|
return JSONResponse({"error": "无效的请求路径"}, status_code=400) |
|
|
|
|
|
if not path.startswith(('http://', 'https://')): |
|
path = 'https://' + path |
|
|
|
try: |
|
|
|
response = proxy.proxy_request(path, request) |
|
|
|
if 'error' in response: |
|
return JSONResponse({"error": response['error']}, status_code=response['status']) |
|
|
|
if 'redirect_url' in response: |
|
return RedirectResponse(response['redirect_url']) |
|
|
|
|
|
proxy_response = proxy.fetch_github_content(path, stream=True) |
|
if proxy_response.error: |
|
return JSONResponse({"error": proxy_response.error}, status_code=proxy_response.status) |
|
|
|
|
|
headers = dict(proxy_response.headers) |
|
return StreamingResponse( |
|
proxy_response.content.iter_content(chunk_size=8192), |
|
headers=headers, |
|
status_code=proxy_response.status |
|
) |
|
|
|
except Exception as e: |
|
logging.error(f"Proxy error: {str(e)}") |
|
return JSONResponse({"error": f"代理请求失败: {str(e)}"}, status_code=500) |
|
|
|
with gr.Blocks(title="GitHub Proxy", theme=gr.themes.Soft()) as blocks: |
|
gr.Markdown(""" |
|
# 🚀 GitHub Proxy |
|
|
|
### 使用方法 |
|
1. 直接访问: `https://your-domain.com/github-url` |
|
2. 或者在下方输入GitHub URL进行测试 |
|
|
|
### 功能特点 |
|
- ✨ 支持多种GitHub URL格式 |
|
- 🔄 自动处理重定向 |
|
- 💾 响应缓存 |
|
- ⚡ CDN加速支持 |
|
- 🛡️ 请求频率限制 |
|
|
|
### 支持的URL类型 |
|
- GitHub Release/Archive |
|
- GitHub Raw/Blob |
|
- GitHub Gist |
|
- Raw GitHub Content |
|
""") |
|
|
|
with gr.Row(): |
|
url_input = gr.Textbox( |
|
label="GitHub URL", |
|
placeholder="输入GitHub URL,例如:github.com/user/repo/blob/master/file.txt", |
|
scale=4 |
|
) |
|
submit_btn = gr.Button("获取内容", scale=1) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
status = gr.Textbox(label="状态") |
|
headers = gr.JSON(label="响应头") |
|
with gr.Column(): |
|
content = gr.Textbox(label="内容", max_lines=20) |
|
error = gr.Textbox(label="错误信息", visible=False) |
|
|
|
def handle_request(url: str, request: gr.Request): |
|
result = proxy.proxy_request(url, request) |
|
|
|
|
|
error_visible = "error" in result |
|
error_msg = result.get("error", "") |
|
|
|
return { |
|
status: f"状态码: {result['status']}", |
|
headers: result["headers"], |
|
content: result["content"], |
|
error: error_msg, |
|
error: gr.update(visible=error_visible, value=error_msg) |
|
} |
|
|
|
submit_btn.click( |
|
fn=handle_request, |
|
inputs=[url_input], |
|
outputs=[status, headers, content, error] |
|
) |
|
|
|
|
|
gr.Examples( |
|
examples=[ |
|
["github.com/microsoft/vscode/blob/main/README.md"], |
|
["raw.githubusercontent.com/microsoft/vscode/main/README.md"], |
|
["github.com/ollama/ollama/releases/download/v0.5.1/ollama-windows-amd64.zip"] |
|
], |
|
inputs=url_input |
|
) |
|
|
|
|
|
blocks.queue() |
|
api.mount("/ui", blocks) |
|
return api |
|
|
|
if __name__ == "__main__": |
|
import uvicorn |
|
app = create_interface() |
|
uvicorn.run(app, host="0.0.0.0", port=7860) |
|
|