proxy / app.py
aigenai's picture
Create app.py
cb63927 verified
raw
history blame
9.74 kB
import gradio as gr
import requests
import re
import time
import logging
from functools import lru_cache
from urllib.parse import urlparse, urljoin
from typing import Dict, Any, Optional, List
from dataclasses import dataclass
from datetime import datetime
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
@dataclass
class ProxyResponse:
"""代理响应数据类"""
status: int
content: str
headers: Dict[str, str]
redirect_url: Optional[str] = None
error: Optional[str] = None
class Config:
"""配置类"""
ASSET_URL = "https://1pages.nbid.bid/"
PREFIX = "/"
JSDELIVR = 0
CACHE_TTL = 3600
MAX_RETRIES = 3
TIMEOUT = 10
RATE_LIMIT = {
"window_ms": 15 * 60 * 1000, # 15分钟
"max": 100 # 限制每个IP最多100个请求
}
WHITE_LIST: List[str] = [] # 白名单
# 请求头
DEFAULT_HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
# CORS设置
CORS = {
"allow_origins": ["*"],
"allow_methods": ["GET", "POST", "OPTIONS"],
"allow_headers": ["*"],
"max_age": 1728000
}
# URL模式
PATTERNS = {
"releases": r"^(?:https?:\/\/)?github\.com\/.+?\/.+?\/(?:releases|archive)\/.*$",
"blob": r"^(?:https?:\/\/)?github\.com\/.+?\/.+?\/(?:blob|raw)\/.*$",
"git": r"^(?:https?:\/\/)?github\.com\/.+?\/.+?\/(?:info|git-).*$",
"raw": r"^(?:https?:\/\/)?raw\.(?:githubusercontent|github)\.com\/.+?\/.+?\/.+?\/.+$",
"gist": r"^(?:https?:\/\/)?gist\.(?:githubusercontent|github)\.com\/.+?\/.+?\/.+$",
"tags": r"^(?:https?:\/\/)?github\.com\/.+?\/.+?\/tags.*$"
}
class RateLimiter:
"""请求频率限制器"""
def __init__(self):
self.request_records: Dict[str, List[float]] = {}
def is_allowed(self, ip: str) -> bool:
now = time.time() * 1000
window_start = now - Config.RATE_LIMIT["window_ms"]
if ip not in self.request_records:
self.request_records[ip] = []
# 清理过期记录
self.request_records[ip] = [t for t in self.request_records[ip] if t > window_start]
if len(self.request_records[ip]) >= Config.RATE_LIMIT["max"]:
return False
self.request_records[ip].append(now)
return True
def get_remaining(self, ip: str) -> int:
"""获取剩余请求次数"""
if ip not in self.request_records:
return Config.RATE_LIMIT["max"]
return Config.RATE_LIMIT["max"] - len(self.request_records[ip])
class GitHubProxy:
"""GitHub代理核心类"""
def __init__(self):
self.rate_limiter = RateLimiter()
self.session = requests.Session()
self.session.headers.update(Config.DEFAULT_HEADERS)
def check_url(self, url: str) -> bool:
"""检查URL是否匹配GitHub模式"""
return any(re.search(pattern, url, re.I) for pattern in Config.PATTERNS.values())
def check_white_list(self, url: str) -> bool:
"""检查白名单"""
if not Config.WHITE_LIST:
return True
return any(white_item in url for white_item in Config.WHITE_LIST)
@lru_cache(maxsize=1000)
def fetch_github_content(self, url: str, method: str = "GET") -> ProxyResponse:
"""获取GitHub内容(带缓存)"""
try:
response = self.session.request(
method=method,
url=url,
timeout=Config.TIMEOUT,
allow_redirects=False
)
headers = dict(response.headers)
# 处理重定向
if response.is_redirect:
redirect_url = response.headers["Location"]
if self.check_url(redirect_url):
redirect_url = Config.PREFIX + redirect_url
return ProxyResponse(
status=response.status_code,
content="",
headers=headers,
redirect_url=redirect_url
)
return ProxyResponse(
status=response.status_code,
content=response.text,
headers=headers
)
except requests.Timeout:
return ProxyResponse(
status=504,
content="Request Timeout",
headers={},
error="请求超时"
)
except Exception as e:
logging.error(f"Fetch error: {str(e)}")
return ProxyResponse(
status=500,
content=str(e),
headers={},
error="服务器内部错误"
)
def proxy_request(self, url: str, request: gr.Request) -> Dict[str, Any]:
"""处理代理请求"""
# 记录请求
logging.info(f"Proxy request from {request.client.host} to {url}")
# 检查频率限制
if not self.rate_limiter.is_allowed(request.client.host):
return {
"status": 429,
"content": "Too Many Requests",
"headers": {},
"error": "请求过于频繁,请稍后再试",
"rate_limit": {
"remaining": self.rate_limiter.get_remaining(request.client.host),
"reset": int((time.time() * 1000 + Config.RATE_LIMIT["window_ms"]) / 1000)
}
}
# 检查白名单
if not self.check_white_list(url):
return {
"status": 403,
"content": "Access Denied",
"headers": {},
"error": "访问被拒绝"
}
# 处理URL
if not url.startswith(("http://", "https://")):
url = "https://" + url
# 检查URL是否为GitHub链接
if not self.check_url(url):
return {
"status": 400,
"content": "Invalid GitHub URL",
"headers": {},
"error": "无效的GitHub URL"
}
# 处理jsDelivr重定向
if Config.JSDELIVR and re.search(Config.PATTERNS["blob"], url):
url = url.replace("/blob/", "@").replace("github.com", "cdn.jsdelivr.net/gh")
return {
"status": 302,
"content": "",
"headers": {"Location": url},
"redirect_url": url
}
# 获取内容
response = self.fetch_github_content(url)
result = {
"status": response.status,
"content": response.content,
"headers": response.headers,
"timestamp": datetime.now().isoformat()
}
if response.redirect_url:
result["redirect_url"] = response.redirect_url
if response.error:
result["error"] = response.error
return result
def create_interface():
"""创建Gradio界面"""
proxy = GitHubProxy()
with gr.Blocks(title="GitHub Proxy", theme=gr.themes.Soft()) as app:
gr.Markdown("""
# 🚀 GitHub Proxy
### 功能特点
- ✨ 支持多种GitHub URL格式
- 🔄 自动处理重定向
- 💾 响应缓存
- ⚡ CDN加速支持
- 🛡️ 请求频率限制
### 支持的URL类型
- GitHub Release/Archive
- GitHub Raw/Blob
- GitHub Gist
- Raw GitHub Content
""")
with gr.Row():
url_input = gr.Textbox(
label="GitHub URL",
placeholder="输入GitHub URL,例如:github.com/user/repo/blob/master/file.txt",
scale=4
)
submit_btn = gr.Button("获取内容", scale=1)
with gr.Row():
with gr.Column():
status = gr.Textbox(label="状态")
headers = gr.JSON(label="响应头")
with gr.Column():
content = gr.Textbox(label="内容", max_lines=20)
error = gr.Textbox(label="错误信息", visible=False)
def handle_request(url: str, request: gr.Request):
result = proxy.proxy_request(url, request)
# 更新UI
error_visible = "error" in result
error_msg = result.get("error", "")
return {
status: f"状态码: {result['status']}",
headers: result["headers"],
content: result["content"],
error: error_msg,
error: gr.update(visible=error_visible, value=error_msg)
}
submit_btn.click(
fn=handle_request,
inputs=[url_input],
outputs=[status, headers, content, error]
)
# 添加示例
gr.Examples(
examples=[
["github.com/microsoft/vscode/blob/main/README.md"],
["raw.githubusercontent.com/microsoft/vscode/main/README.md"],
["gist.github.com/username/gist_id/raw/file.txt"]
],
inputs=url_input
)
return app
if __name__ == "__main__":
app = create_interface()
app.launch(
server_name="0.0.0.0",
server_port=7860,
show_error=True,
quiet=False
)