|
import gradio as gr |
|
import requests |
|
import re |
|
import time |
|
import logging |
|
from functools import lru_cache |
|
from urllib.parse import urlparse, urljoin |
|
from typing import Dict, Any, Optional, List |
|
from dataclasses import dataclass |
|
from datetime import datetime |
|
from fastapi import FastAPI, Request |
|
from fastapi.responses import StreamingResponse, RedirectResponse, JSONResponse |
|
from fastapi.middleware.cors import CORSMiddleware |
|
|
|
|
|
logging.basicConfig( |
|
level=logging.INFO, |
|
format='%(asctime)s - %(levelname)s - %(message)s' |
|
) |
|
|
|
@dataclass |
|
class ProxyResponse: |
|
"""代理响应数据类""" |
|
status: int |
|
content: Any |
|
headers: Dict[str, str] |
|
redirect_url: Optional[str] = None |
|
error: Optional[str] = None |
|
|
|
class Config: |
|
"""配置类""" |
|
ASSET_URL = "https://1pages.nbid.bid/" |
|
PREFIX = "/" |
|
JSDELIVR = 0 |
|
CACHE_TTL = 3600 |
|
MAX_RETRIES = 3 |
|
TIMEOUT = 20 |
|
CHUNK_SIZE = 1024 * 1024 |
|
SIZE_LIMIT = 1024 * 1024 * 1024 * 99 |
|
|
|
RATE_LIMIT = { |
|
"window_ms": 15 * 60 * 1000, |
|
"max": 200 |
|
} |
|
WHITE_LIST: List[str] = [] |
|
BLACK_LIST: List[str] = [] |
|
PASS_LIST: List[str] = [] |
|
|
|
|
|
DEFAULT_HEADERS = { |
|
"User-Agent": "git/2.41.0", |
|
"Accept": "*/*", |
|
"Accept-Encoding": "gzip, deflate, br" |
|
} |
|
|
|
|
|
CORS = { |
|
"allow_origins": ["*"], |
|
"allow_methods": ["GET", "POST", "OPTIONS", "HEAD"], |
|
"allow_headers": ["*"], |
|
"max_age": 1728000 |
|
} |
|
|
|
|
|
PATTERNS = { |
|
|
|
"releases": r"^(?:https?:\/\/)?github\.com\/(?P<author>.+?)\/(?P<repo>.+?)\/(?:releases|archive)\/.*$", |
|
|
|
"blob": r"^(?:https?:\/\/)?github\.com\/(?P<author>.+?)\/(?P<repo>.+?)\/(?:blob|raw)\/.*$", |
|
|
|
"git": r"^(?:https?:\/\/)?github\.com\/(?P<author>.+?)\/(?P<repo>.+?)\/(?:info\/refs\?service=)?git-.*$", |
|
|
|
"raw": r"^(?:https?:\/\/)?raw\.(?:githubusercontent|github)\.com\/(?P<author>.+?)\/(?P<repo>.+?)\/.+?\/.+$", |
|
|
|
"gist": r"^(?:https?:\/\/)?gist\.(?:githubusercontent|github)\.com\/(?P<author>.+?)\/.+?\/.+$", |
|
|
|
"tags": r"^(?:https?:\/\/)?github\.com\/(?P<author>.+?)\/(?P<repo>.+?)\/tags.*$" |
|
} |
|
|
|
class RateLimiter: |
|
"""请求频率限制器""" |
|
def __init__(self): |
|
self.request_records: Dict[str, List[float]] = {} |
|
|
|
def is_allowed(self, ip: str) -> bool: |
|
now = time.time() * 1000 |
|
window_start = now - Config.RATE_LIMIT["window_ms"] |
|
|
|
if ip not in self.request_records: |
|
self.request_records[ip] = [] |
|
|
|
|
|
self.request_records[ip] = [t for t in self.request_records[ip] if t > window_start] |
|
|
|
if len(self.request_records[ip]) >= Config.RATE_LIMIT["max"]: |
|
return False |
|
|
|
self.request_records[ip].append(now) |
|
return True |
|
|
|
def get_remaining(self, ip: str) -> int: |
|
"""获取剩余请求次数""" |
|
if ip not in self.request_records: |
|
return Config.RATE_LIMIT["max"] |
|
return Config.RATE_LIMIT["max"] - len(self.request_records[ip]) |
|
|
|
class GitHubProxy: |
|
"""GitHub代理核心类""" |
|
def __init__(self): |
|
self.rate_limiter = RateLimiter() |
|
self.session = requests.Session() |
|
self.session.headers.update(Config.DEFAULT_HEADERS) |
|
|
|
def check_url(self, url: str) -> bool: |
|
"""检查URL是否匹配GitHub模式""" |
|
return any(re.search(pattern, url, re.I) for pattern in Config.PATTERNS.values()) |
|
|
|
def check_white_list(self, url: str) -> bool: |
|
"""检查白名单""" |
|
if not Config.WHITE_LIST: |
|
return True |
|
return any(white_item in url for white_item in Config.WHITE_LIST) |
|
|
|
@lru_cache(maxsize=1000) |
|
def fetch_github_content(self, url: str, method: str = "GET", stream: bool = False) -> ProxyResponse: |
|
"""获取GitHub内容(带缓存)""" |
|
try: |
|
response = self.session.request( |
|
method=method, |
|
url=url, |
|
timeout=Config.TIMEOUT, |
|
allow_redirects=False, |
|
stream=stream |
|
) |
|
|
|
headers = dict(response.headers) |
|
|
|
|
|
if response.is_redirect: |
|
redirect_url = response.headers["Location"] |
|
if self.check_url(redirect_url): |
|
redirect_url = Config.PREFIX + redirect_url |
|
return ProxyResponse( |
|
status=response.status_code, |
|
content="", |
|
headers=headers, |
|
redirect_url=redirect_url |
|
) |
|
|
|
|
|
if stream: |
|
return ProxyResponse( |
|
status=response.status_code, |
|
content=response, |
|
headers=headers |
|
) |
|
|
|
|
|
content_type = response.headers.get('content-type', '') |
|
is_binary = not any(text_type in content_type.lower() for text_type in ['text', 'json', 'xml', 'html']) |
|
|
|
content = response.content if is_binary else response.text |
|
|
|
return ProxyResponse( |
|
status=response.status_code, |
|
content=content, |
|
headers=headers |
|
) |
|
|
|
except requests.Timeout: |
|
return ProxyResponse( |
|
status=504, |
|
content="Request Timeout", |
|
headers={}, |
|
error="请求超时" |
|
) |
|
except Exception as e: |
|
logging.error(f"Fetch error: {str(e)}") |
|
return ProxyResponse( |
|
status=500, |
|
content=str(e), |
|
headers={}, |
|
error="服务器内部错误" |
|
) |
|
|
|
def proxy_request(self, url: str, request: Request) -> Dict[str, Any]: |
|
"""处理代理请求""" |
|
|
|
logging.info(f"Proxy request from {request.client.host} to {url}") |
|
|
|
|
|
if not self.rate_limiter.is_allowed(request.client.host): |
|
return { |
|
"status": 429, |
|
"content": "Too Many Requests", |
|
"headers": {}, |
|
"error": "请求过于频繁,请稍后再试", |
|
"rate_limit": { |
|
"remaining": self.rate_limiter.get_remaining(request.client.host), |
|
"reset": int((time.time() * 1000 + Config.RATE_LIMIT["window_ms"]) / 1000) |
|
} |
|
} |
|
|
|
|
|
if not self.check_white_list(url): |
|
return { |
|
"status": 403, |
|
"content": "Access Denied", |
|
"headers": {}, |
|
"error": "访问被拒绝" |
|
} |
|
|
|
|
|
if not url.startswith(("http://", "https://")): |
|
url = "https://" + url |
|
|
|
|
|
if not self.check_url(url): |
|
return { |
|
"status": 400, |
|
"content": "Invalid GitHub URL", |
|
"headers": {}, |
|
"error": "无效的GitHub URL" |
|
} |
|
|
|
|
|
if Config.JSDELIVR and re.search(Config.PATTERNS["blob"], url): |
|
url = url.replace("/blob/", "@").replace("github.com", "cdn.jsdelivr.net/gh") |
|
return { |
|
"status": 302, |
|
"content": "", |
|
"headers": {"Location": url}, |
|
"redirect_url": url |
|
} |
|
|
|
|
|
response = self.fetch_github_content(url) |
|
result = { |
|
"status": response.status, |
|
"content": response.content, |
|
"headers": response.headers, |
|
"timestamp": datetime.now().isoformat() |
|
} |
|
|
|
if response.redirect_url: |
|
result["redirect_url"] = response.redirect_url |
|
if response.error: |
|
result["error"] = response.error |
|
|
|
return result |
|
|
|
|
|
api = FastAPI() |
|
|
|
|
|
api.add_middleware( |
|
CORSMiddleware, |
|
allow_origins=["*"], |
|
allow_credentials=True, |
|
allow_methods=["*"], |
|
allow_headers=["*"], |
|
) |
|
|
|
def create_interface(): |
|
"""创建Gradio界面""" |
|
proxy = GitHubProxy() |
|
|
|
@api.get("/{path:path}") |
|
async def proxy_download(request: Request, path: str): |
|
"""处理直接代理请求""" |
|
if not path: |
|
return JSONResponse({"error": "无效的请求路径"}, status_code=400) |
|
|
|
|
|
if not path.startswith(('http://', 'https://')): |
|
path = 'https://' + path |
|
|
|
try: |
|
|
|
match = None |
|
for pattern in Config.PATTERNS.values(): |
|
if re.match(pattern, path): |
|
match = re.match(pattern, path) |
|
break |
|
|
|
if not match: |
|
return JSONResponse({"error": "不支持的URL格式"}, status_code=400) |
|
|
|
|
|
author, repo = match.group('author', 'repo') |
|
if Config.WHITE_LIST and not any( |
|
(a == '*' or a == author) and (r == '*' or r == repo) |
|
for a, r in [x.split('/') for x in Config.WHITE_LIST] |
|
): |
|
return JSONResponse({"error": "不在白名单中"}, status_code=403) |
|
|
|
if any( |
|
(a == '*' or a == author) and (r == '*' or r == repo) |
|
for a, r in [x.split('/') for x in Config.BLACK_LIST] |
|
): |
|
return JSONResponse({"error": "在黑名单中"}, status_code=403) |
|
|
|
|
|
use_jsdelivr = Config.JSDELIVR or any( |
|
(a == '*' or a == author) and (r == '*' or r == repo) |
|
for a, r in [x.split('/') for x in Config.PASS_LIST] |
|
) |
|
|
|
if use_jsdelivr and ('blob' in path or 'raw.githubusercontent.com' in path): |
|
|
|
if 'blob' in path: |
|
path = path.replace('/blob/', '@').replace('github.com', 'cdn.jsdelivr.net/gh', 1) |
|
else: |
|
path = re.sub(r'(\.com/.*?/.+?)/(.+?/)', r'\1@\2', path, 1) |
|
path = path.replace('raw.githubusercontent.com', 'cdn.jsdelivr.net/gh', 1) |
|
return RedirectResponse(path) |
|
|
|
|
|
if 'blob' in path: |
|
path = path.replace('/blob/', '/raw/', 1) |
|
|
|
|
|
if 'git-upload-pack' in path or 'git-receive-pack' in path or 'info/refs' in path: |
|
headers = dict(request.headers) |
|
headers.update({ |
|
'User-Agent': 'git/2.41.0', |
|
'Accept': 'application/x-git-upload-pack-result, */*', |
|
}) |
|
proxy.session.headers.update(headers) |
|
|
|
|
|
response = proxy.proxy_request(path, request) |
|
|
|
if 'error' in response: |
|
return JSONResponse({"error": response['error']}, status_code=response['status']) |
|
|
|
if 'redirect_url' in response: |
|
return RedirectResponse(response['redirect_url']) |
|
|
|
|
|
proxy_response = proxy.fetch_github_content(path, stream=True) |
|
if proxy_response.error: |
|
return JSONResponse({"error": proxy_response.error}, status_code=proxy_response.status) |
|
|
|
|
|
content_length = int(proxy_response.headers.get('content-length', 0)) |
|
if content_length > Config.SIZE_LIMIT: |
|
return RedirectResponse(path) |
|
|
|
|
|
headers = dict(proxy_response.headers) |
|
|
|
if 'git-upload-pack' in path: |
|
headers['Content-Type'] = 'application/x-git-upload-pack-result' |
|
elif 'git-receive-pack' in path: |
|
headers['Content-Type'] = 'application/x-git-receive-pack-result' |
|
elif 'info/refs' in path: |
|
headers['Content-Type'] = 'application/x-git-upload-pack-advertisement' |
|
|
|
return StreamingResponse( |
|
proxy_response.content.iter_content(chunk_size=Config.CHUNK_SIZE), |
|
headers=headers, |
|
status_code=proxy_response.status |
|
) |
|
|
|
except Exception as e: |
|
logging.error(f"Proxy error: {str(e)}") |
|
return JSONResponse({"error": f"代理请求失败: {str(e)}"}, status_code=500) |
|
|
|
with gr.Blocks(title="GitHub Proxy", theme=gr.themes.Soft()) as blocks: |
|
gr.Markdown(""" |
|
# 🚀 GitHub Proxy |
|
|
|
### 使用方法 |
|
1. 直接访问: `https://your-domain.com/github-url` |
|
2. 或者在下方输入GitHub URL进行测试 |
|
|
|
### 功能特点 |
|
- ✨ 支持多种GitHub URL格式 |
|
- 🔄 自动处理重定向 |
|
- 💾 响应缓存 |
|
- ⚡ CDN加速支持 |
|
- 🛡️ 请求频率限制 |
|
|
|
### 支持的URL类型 |
|
- GitHub Release/Archive |
|
- GitHub Raw/Blob |
|
- GitHub Gist |
|
- Raw GitHub Content |
|
""") |
|
|
|
with gr.Row(): |
|
url_input = gr.Textbox( |
|
label="GitHub URL", |
|
placeholder="输入GitHub URL,例如:github.com/user/repo/blob/master/file.txt", |
|
scale=4 |
|
) |
|
submit_btn = gr.Button("获取内容", scale=1) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
status = gr.Textbox(label="状态") |
|
headers = gr.JSON(label="响应头") |
|
with gr.Column(): |
|
content = gr.Textbox(label="内容", max_lines=20) |
|
error = gr.Textbox(label="错误信息", visible=False) |
|
|
|
def handle_request(url: str, request: gr.Request): |
|
result = proxy.proxy_request(url, request) |
|
|
|
|
|
error_visible = "error" in result |
|
error_msg = result.get("error", "") |
|
|
|
return { |
|
status: f"状态码: {result['status']}", |
|
headers: result["headers"], |
|
content: result["content"], |
|
error: error_msg, |
|
error: gr.update(visible=error_visible, value=error_msg) |
|
} |
|
|
|
submit_btn.click( |
|
fn=handle_request, |
|
inputs=[url_input], |
|
outputs=[status, headers, content, error] |
|
) |
|
|
|
|
|
gr.Examples( |
|
examples=[ |
|
["github.com/microsoft/vscode/blob/main/README.md"], |
|
["raw.githubusercontent.com/microsoft/vscode/main/README.md"], |
|
["github.com/ollama/ollama/releases/download/v0.5.1/ollama-windows-amd64.zip"] |
|
], |
|
inputs=url_input |
|
) |
|
|
|
|
|
blocks.queue() |
|
api.mount("/ui", blocks) |
|
return api |
|
|
|
if __name__ == "__main__": |
|
import uvicorn |
|
app = create_interface() |
|
uvicorn.run(app, host="0.0.0.0", port=7860) |
|
|