import gradio as gr import requests import re import time import logging from functools import lru_cache from urllib.parse import urlparse, urljoin from typing import Dict, Any, Optional, List from dataclasses import dataclass from datetime import datetime # 配置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) @dataclass class ProxyResponse: """代理响应数据类""" status: int content: str headers: Dict[str, str] redirect_url: Optional[str] = None error: Optional[str] = None class Config: """配置类""" ASSET_URL = "https://1pages.nbid.bid/" PREFIX = "/" JSDELIVR = 0 CACHE_TTL = 3600 MAX_RETRIES = 3 TIMEOUT = 10 RATE_LIMIT = { "window_ms": 15 * 60 * 1000, # 15分钟 "max": 100 # 限制每个IP最多100个请求 } WHITE_LIST: List[str] = [] # 白名单 # 请求头 DEFAULT_HEADERS = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" } # CORS设置 CORS = { "allow_origins": ["*"], "allow_methods": ["GET", "POST", "OPTIONS"], "allow_headers": ["*"], "max_age": 1728000 } # URL模式 PATTERNS = { "releases": r"^(?:https?:\/\/)?github\.com\/.+?\/.+?\/(?:releases|archive)\/.*$", "blob": r"^(?:https?:\/\/)?github\.com\/.+?\/.+?\/(?:blob|raw)\/.*$", "git": r"^(?:https?:\/\/)?github\.com\/.+?\/.+?\/(?:info|git-).*$", "raw": r"^(?:https?:\/\/)?raw\.(?:githubusercontent|github)\.com\/.+?\/.+?\/.+?\/.+$", "gist": r"^(?:https?:\/\/)?gist\.(?:githubusercontent|github)\.com\/.+?\/.+?\/.+$", "tags": r"^(?:https?:\/\/)?github\.com\/.+?\/.+?\/tags.*$" } class RateLimiter: """请求频率限制器""" def __init__(self): self.request_records: Dict[str, List[float]] = {} def is_allowed(self, ip: str) -> bool: now = time.time() * 1000 window_start = now - Config.RATE_LIMIT["window_ms"] if ip not in self.request_records: self.request_records[ip] = [] # 清理过期记录 self.request_records[ip] = [t for t in self.request_records[ip] if t > window_start] if len(self.request_records[ip]) >= Config.RATE_LIMIT["max"]: return False self.request_records[ip].append(now) return True def get_remaining(self, ip: str) -> int: """获取剩余请求次数""" if ip not in self.request_records: return Config.RATE_LIMIT["max"] return Config.RATE_LIMIT["max"] - len(self.request_records[ip]) class GitHubProxy: """GitHub代理核心类""" def __init__(self): self.rate_limiter = RateLimiter() self.session = requests.Session() self.session.headers.update(Config.DEFAULT_HEADERS) def check_url(self, url: str) -> bool: """检查URL是否匹配GitHub模式""" return any(re.search(pattern, url, re.I) for pattern in Config.PATTERNS.values()) def check_white_list(self, url: str) -> bool: """检查白名单""" if not Config.WHITE_LIST: return True return any(white_item in url for white_item in Config.WHITE_LIST) @lru_cache(maxsize=1000) def fetch_github_content(self, url: str, method: str = "GET") -> ProxyResponse: """获取GitHub内容(带缓存)""" try: response = self.session.request( method=method, url=url, timeout=Config.TIMEOUT, allow_redirects=False ) headers = dict(response.headers) # 处理重定向 if response.is_redirect: redirect_url = response.headers["Location"] if self.check_url(redirect_url): redirect_url = Config.PREFIX + redirect_url return ProxyResponse( status=response.status_code, content="", headers=headers, redirect_url=redirect_url ) return ProxyResponse( status=response.status_code, content=response.text, headers=headers ) except requests.Timeout: return ProxyResponse( status=504, content="Request Timeout", headers={}, error="请求超时" ) except Exception as e: logging.error(f"Fetch error: {str(e)}") return ProxyResponse( status=500, content=str(e), headers={}, error="服务器内部错误" ) def proxy_request(self, url: str, request: gr.Request) -> Dict[str, Any]: """处理代理请求""" # 记录请求 logging.info(f"Proxy request from {request.client.host} to {url}") # 检查频率限制 if not self.rate_limiter.is_allowed(request.client.host): return { "status": 429, "content": "Too Many Requests", "headers": {}, "error": "请求过于频繁,请稍后再试", "rate_limit": { "remaining": self.rate_limiter.get_remaining(request.client.host), "reset": int((time.time() * 1000 + Config.RATE_LIMIT["window_ms"]) / 1000) } } # 检查白名单 if not self.check_white_list(url): return { "status": 403, "content": "Access Denied", "headers": {}, "error": "访问被拒绝" } # 处理URL if not url.startswith(("http://", "https://")): url = "https://" + url # 检查URL是否为GitHub链接 if not self.check_url(url): return { "status": 400, "content": "Invalid GitHub URL", "headers": {}, "error": "无效的GitHub URL" } # 处理jsDelivr重定向 if Config.JSDELIVR and re.search(Config.PATTERNS["blob"], url): url = url.replace("/blob/", "@").replace("github.com", "cdn.jsdelivr.net/gh") return { "status": 302, "content": "", "headers": {"Location": url}, "redirect_url": url } # 获取内容 response = self.fetch_github_content(url) result = { "status": response.status, "content": response.content, "headers": response.headers, "timestamp": datetime.now().isoformat() } if response.redirect_url: result["redirect_url"] = response.redirect_url if response.error: result["error"] = response.error return result def create_interface(): """创建Gradio界面""" proxy = GitHubProxy() with gr.Blocks(title="GitHub Proxy", theme=gr.themes.Soft()) as app: gr.Markdown(""" # 🚀 GitHub Proxy ### 功能特点 - ✨ 支持多种GitHub URL格式 - 🔄 自动处理重定向 - 💾 响应缓存 - ⚡ CDN加速支持 - 🛡️ 请求频率限制 ### 支持的URL类型 - GitHub Release/Archive - GitHub Raw/Blob - GitHub Gist - Raw GitHub Content """) with gr.Row(): url_input = gr.Textbox( label="GitHub URL", placeholder="输入GitHub URL,例如:github.com/user/repo/blob/master/file.txt", scale=4 ) submit_btn = gr.Button("获取内容", scale=1) with gr.Row(): with gr.Column(): status = gr.Textbox(label="状态") headers = gr.JSON(label="响应头") with gr.Column(): content = gr.Textbox(label="内容", max_lines=20) error = gr.Textbox(label="错误信息", visible=False) def handle_request(url: str, request: gr.Request): result = proxy.proxy_request(url, request) # 更新UI error_visible = "error" in result error_msg = result.get("error", "") return { status: f"状态码: {result['status']}", headers: result["headers"], content: result["content"], error: error_msg, error: gr.update(visible=error_visible, value=error_msg) } submit_btn.click( fn=handle_request, inputs=[url_input], outputs=[status, headers, content, error] ) # 添加示例 gr.Examples( examples=[ ["github.com/microsoft/vscode/blob/main/README.md"], ["raw.githubusercontent.com/microsoft/vscode/main/README.md"], ["gist.github.com/username/gist_id/raw/file.txt"] ], inputs=url_input ) return app if __name__ == "__main__": app = create_interface() app.launch( server_name="0.0.0.0", server_port=7860, show_error=True, quiet=False )