File size: 14,721 Bytes
913b6ab
 
 
7e8a8d2
 
 
 
 
913b6ab
 
c3009b9
913b6ab
 
 
7e8a8d2
 
913b6ab
7e8a8d2
913b6ab
7e8a8d2
 
 
59098f2
7e8a8d2
913b6ab
7e8a8d2
 
913b6ab
 
 
 
 
 
 
 
 
 
 
 
7e8a8d2
76c89e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7e8a8d2
 
913b6ab
 
 
59098f2
913b6ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7e8a8d2
 
913b6ab
7e8a8d2
913b6ab
 
 
 
 
 
 
 
 
 
 
 
7e8a8d2
 
913b6ab
 
 
7e8a8d2
 
 
 
0b76791
913b6ab
7e8a8d2
59098f2
913b6ab
 
43a10de
913b6ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59098f2
913b6ab
76c89e5
 
913b6ab
 
 
 
 
 
 
 
 
 
 
59098f2
7e8a8d2
 
0af7a49
 
913b6ab
 
 
 
 
 
 
 
 
 
 
 
 
 
76c89e5
 
7e8a8d2
913b6ab
 
 
 
 
 
7e8a8d2
913b6ab
 
76c89e5
 
 
 
 
 
 
 
 
 
 
 
913b6ab
 
7e8a8d2
 
913b6ab
7e8a8d2
913b6ab
 
 
 
 
 
 
 
 
7e8a8d2
 
 
913b6ab
 
 
 
7e8a8d2
 
 
 
 
 
 
c76f23c
 
 
 
 
 
 
 
 
 
 
 
913b6ab
 
76c89e5
913b6ab
 
 
 
 
7e8a8d2
 
913b6ab
c76f23c
 
 
 
 
 
 
 
 
 
 
 
59098f2
913b6ab
 
 
 
 
 
 
 
 
 
76c89e5
913b6ab
7e8a8d2
76c89e5
913b6ab
 
 
 
 
 
 
 
 
 
c1df335
 
 
 
 
 
 
3a8851a
c1df335
 
 
 
 
 
 
 
 
 
913b6ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76c89e5
913b6ab
 
59098f2
913b6ab
 
 
7e8a8d2
 
 
 
 
 
304bfcc
 
 
 
 
 
 
 
7e8a8d2
304bfcc
913b6ab
 
304bfcc
913b6ab
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
from flask import Flask, request, jsonify, Response, stream_with_context, render_template_string
from google.generativeai.types import generation_types
from google.api_core.exceptions import InvalidArgument, ResourceExhausted, ServiceUnavailable, InternalServerError, Aborted
import google.generativeai as genai
import json
import os
import logging
import func
from datetime import datetime, timedelta
from apscheduler.schedulers.background import BackgroundScheduler
import time
import requests
from collections import deque


os.environ['TZ'] = 'Asia/Shanghai'

app = Flask(__name__)

app.secret_key = os.urandom(24)


formatter = logging.Formatter('%(message)s') 
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.StreamHandler()
handler.setFormatter(formatter)
logger.addHandler(handler)

MAX_RETRIES = int(os.environ.get('MaxRetries', 3))
MAX_REQUESTS = int(os.environ.get('MaxRequests', 2))
LIMIT_WINDOW = int(os.environ.get('LimitWindow', 60))
RETRY_DELAY = 1
MAX_RETRY_DELAY = 16

request_counts = {}

api_key_blacklist = set()
api_key_blacklist_duration = 60

# 核心优势
safety_settings = [
    {
        "category": "HARM_CATEGORY_HARASSMENT",
        "threshold": "BLOCK_NONE"
    },
    {
        "category": "HARM_CATEGORY_HATE_SPEECH",
        "threshold": "BLOCK_NONE"
    },
    {
        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
        "threshold": "BLOCK_NONE"
    },
    {
        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
        "threshold": "BLOCK_NONE"
    },
]

class APIKeyManager:
    def __init__(self):
        self.api_keys = os.environ.get('KeyArray').split(',')
        self.current_index = 0

    def get_available_key(self):
        num_keys = len(self.api_keys)
        for _ in range(num_keys):
            if self.current_index >= num_keys:
                self.current_index = 0
            current_key = self.api_keys[self.current_index]
            self.current_index += 1

            if current_key not in api_key_blacklist:
                return current_key

        logger.error("所有API key都已耗尽或被暂时禁用,请重新配置或稍后重试")
        return None 

    def show_all_keys(self):
        logger.info(f"当前可用API key个数: {len(self.api_keys)} ")
        for i, api_key in enumerate(self.api_keys):
            logger.info(f"API Key{i}: {api_key[:11]}...")

    def blacklist_key(self, key):
        logger.warning(f"{key[:11]} → 暂时禁用 {api_key_blacklist_duration} 秒")
        api_key_blacklist.add(key)

        scheduler.add_job(lambda: api_key_blacklist.discard(key), 'date', run_date=datetime.now() + timedelta(seconds=api_key_blacklist_duration))

key_manager = APIKeyManager()
key_manager.show_all_keys()
current_api_key = key_manager.get_available_key()

def switch_api_key():
    global current_api_key
    key = key_manager.get_available_key()
    if key:
      current_api_key = key
      logger.info(f"API key 替换为 → {current_api_key[:11]}...")
    else:
      logger.error("API key 替换失败,所有API key都已耗尽或被暂时禁用,请重新配置或稍后重试")

logger.info(f"当前 API key: {current_api_key[:11]}...")

GEMINI_MODELS = [
    {"id": "gemini-1.5-pro-002"},
    {"id": "gemini-1.5-pro-latest"},
    {"id": "gemini-1.5-pro-exp-0827"},
    {"id": "learnlm-1.5-pro-experimental"},
    {"id": "gemini-exp-1114"},
    {"id": "gemini-exp-1121"},
    {"id": "gemini-exp-1206"},
    {"id": "gemini-2.0-flash-exp"},
    {"id": "gemini-2.0-flash-thinking-exp-1219"},
    {"id": "gemini-2.0-pro-exp"}
]

@app.route('/')
def index():
    main_content = "Moonfanz Reminiproxy v2.1.2 2025-01-09"
    html_template = """

<!DOCTYPE html>

<html>

<head>

<meta charset="utf-8">

<script>

function copyToClipboard(text) {

  var textarea = document.createElement("textarea");

  textarea.textContent = text;

  textarea.style.position = "fixed";

  document.body.appendChild(textarea);

  textarea.select();

  try {

    return document.execCommand("copy");

  } catch (ex) {

    console.warn("Copy to clipboard failed.", ex);

    return false;

  } finally {

    document.body.removeChild(textarea);

  }

}

function copyLink(event) {

  event.preventDefault();

  const url = new URL(window.location.href);

  const link = url.protocol + '//' + url.host + '/hf/v1';

  copyToClipboard(link);

  alert('链接已复制: ' + link);

}

</script>

</head>

<body>

{{ main_content }}<br/><br/>完全开源、免费且禁止商用<br/><br/>点击复制反向代理: <a href="v1" onclick="copyLink(event)">Copy Link</a><br/>聊天来源选择"自定义(兼容 OpenAI)"<br/>将复制的网址填入到自定义端点<br/>将设置password填入自定义API秘钥<br/><br/><br/>

</body>

</html>

    """
    return render_template_string(html_template, main_content=main_content)

def is_within_rate_limit(api_key):
    now = datetime.now()
    if api_key not in request_counts:
        request_counts[api_key] = deque()

    while request_counts[api_key] and request_counts[api_key][0] < now - timedelta(seconds=LIMIT_WINDOW):
        request_counts[api_key].popleft()

    if len(request_counts[api_key]) >= MAX_REQUESTS:
        earliest_request_time = request_counts[api_key][0]
        wait_time = (earliest_request_time + timedelta(seconds=LIMIT_WINDOW)) - now
        return False, wait_time.total_seconds()
    else:
        return True, 0

def increment_request_count(api_key):
    now = datetime.now()
    if api_key not in request_counts:
        request_counts[api_key] = deque()
    request_counts[api_key].append(now)

def handle_api_error(error, attempt, stream=False):
    if attempt > MAX_RETRIES:
        logger.error(f"{MAX_RETRIES} 次尝试后仍然失败,请修改预设或输入")
        return False, jsonify({
                'error': {
                    'message': f"{MAX_RETRIES} 次尝试后仍然失败,请修改预设或输入",
                    'type': 'max_retries_exceeded'
                }
        })

    if isinstance(error, InvalidArgument):
        logger.error(f"{current_api_key[:11]} → 无效,可能已过期或被删除")
        key_manager.blacklist_key(current_api_key)
        switch_api_key()
        return False, None

    elif isinstance(error, (ResourceExhausted, Aborted, InternalServerError, ServiceUnavailable)):
        delay = min(RETRY_DELAY * (2 ** attempt), MAX_RETRY_DELAY)
        if isinstance(error, ResourceExhausted):
            logger.warning(f"{current_api_key[:11]} → 429 官方资源耗尽 → {delay} 秒后重试...")
        else:
            logger.warning(f"{current_api_key[:11]} → 未知错误↙ {delay} 秒后重试...\n{type(error).__name__}\n")
        key_manager.blacklist_key(current_api_key)
        switch_api_key()
        time.sleep(delay)
        return False, None

    elif isinstance(error, generation_types.StopCandidateException):
        logger.warning(f"AI输出内容被Gemini官方阻挡,代理没有得到有效回复")
        switch_api_key()
        return False, None

    else:
        logger.error(f"未知错误↙\n {error}")
        return False, None

@app.route('/hf/v1/chat/completions', methods=['POST'])
def chat_completions():
    is_authenticated, auth_error, status_code = func.authenticate_request(request)
    if not is_authenticated:
        return auth_error if auth_error else jsonify({'error': '未授权'}), status_code if status_code else 401

    request_data = request.get_json()
    messages = request_data.get('messages', [])
    model = request_data.get('model', 'gemini-2.0-flash-exp')
    temperature = request_data.get('temperature', 1)
    max_tokens = request_data.get('max_tokens', 8192)
    stream = request_data.get('stream', False)

    logger.info(f"\n{model} [r] → {current_api_key[:11]}...")

    gemini_history, user_message, error_response = func.process_messages_for_gemini(messages)

    if error_response:
        logger.error(f"处理输入消息时出错↙\n {error_response}")
        return jsonify(error_response), 400

    def do_request(current_api_key, attempt):
        isok, time = is_within_rate_limit(current_api_key)
        if not isok:
            logger.warning(f"{current_api_key[:11]} → 暂时超过限额,该API key将在 {time} 秒后启用...")
            switch_api_key()
            return False, None

        increment_request_count(current_api_key)

        genai.configure(api_key=current_api_key)

        generation_config = {
            "temperature": temperature,
            "max_output_tokens": max_tokens
        }

        gen_model = genai.GenerativeModel(
            model_name=model,
            generation_config=generation_config,
            safety_settings=safety_settings
        )

        try:
            if gemini_history:
                chat_session = gen_model.start_chat(history=gemini_history)
                response = chat_session.send_message(user_message, stream=stream)
            else:
                response = gen_model.generate_content(user_message, stream=stream)
            return True, response
        except Exception as e:
            return handle_api_error(e, attempt, stream)

    def generate(response):
        try:
            for chunk in response:
                if chunk.text:
                    data = {
                        'choices': [
                            {
                                'delta': {
                                    'content': chunk.text
                                },
                                'finish_reason': None,
                                'index': 0
                            }
                        ],
                        'object': 'chat.completion.chunk'
                    }
                    yield f"data: {json.dumps(data)}\n\n"

            data = {
                        'choices': [
                            {
                                'delta': {},
                                'finish_reason': 'stop',
                                'index': 0
                            }
                        ],
                        'object': 'chat.completion.chunk'
                    }

            yield f"data: {json.dumps(data)}\n\n"
            logger.info(f"200!")

        except Exception:
            logger.error(f"流式输出时截断,请关闭流式输出或修改你的输入")
            error_data = {
                'error': {
                    'message': '流式输出时截断,请关闭流式输出或修改你的输入',
                    'type': 'internal_server_error'
                }
            }
            yield f"data: {json.dumps(error_data)}\n\n"
            data = {
                        'choices': [
                            {
                                'delta': {},
                                'finish_reason': 'stop',
                                'index': 0
                            }
                        ],
                        'object': 'chat.completion.chunk'
                    }

            yield f"data: {json.dumps(data)}\n\n"

    attempt = 0
    success = False
    response = None

    while attempt < MAX_RETRIES and not success:
        attempt += 1
        logger.info(f"第 {attempt}/{MAX_RETRIES} 次尝试 ...")
        success, response = do_request(current_api_key, attempt)

    if not success:
        logger.error(f"{MAX_RETRIES} 次尝试均失败,请调整配置或向Moonfanz反馈")
        response = {
            'error': {
                'message': f'{MAX_RETRIES} 次尝试均失败,请调整配置或向Moonfanz反馈',
                'type': 'internal_server_error'
            }
        }
        return jsonify(response), 500 if response is not None else 503

    if stream:
        return Response(stream_with_context(generate(response)), mimetype='text/event-stream')
    else:
        try:
            text_content = response.text
        except (AttributeError, IndexError, TypeError, ValueError) as e:
            if "response.candidates is empty" in str(e):
                logger.error(f"你的输入被AI安全过滤器阻止")
                return jsonify({
                    'error': {
                        'message': '你的输入被AI安全过滤器阻止',
                        'type': 'prompt_blocked_error',
                        'details': str(e)
                    }
                }), 400
            else:
                logger.error(f"AI响应处理失败")
                return jsonify({
                    'error': {
                        'message': 'AI响应处理失败',
                        'type': 'response_processing_error'
                    }
                }), 500

        response_data = {
            'id': 'chatcmpl-xxxxxxxxxxxx',  
            'object': 'chat.completion',
            'created': int(datetime.now().timestamp()),
            'model': model,
            'choices': [{
                'index': 0,
                'message': {
                    'role': 'assistant',
                    'content': text_content
                },
                'finish_reason': 'stop'
            }],
            'usage': {
                'prompt_tokens': 0,
                'completion_tokens': 0,
                'total_tokens': 0
            }
        }
        logger.info(f"200!")
        return jsonify(response_data)

@app.route('/hf/v1/models', methods=['GET'])
def list_models():
    response = {"object": "list", "data": GEMINI_MODELS}
    return jsonify(response)

def keep_alive():
    try:
        response = requests.get("http://127.0.0.1:7860/", timeout=10)
        response.raise_for_status()  
        print(f"Keep alive ping successful: {response.status_code} at {time.ctime()}")
    except requests.exceptions.RequestException as e:
        print(f"Keep alive ping failed: {e} at {time.ctime()}")

if __name__ == '__main__':
    scheduler = BackgroundScheduler()

    scheduler.add_job(keep_alive, 'interval', hours=12)
    scheduler.start()

    logger.info(f"最大尝试次数/MaxRetries: {MAX_RETRIES}")
    logger.info(f"最大请求次数/MaxRequests: {MAX_REQUESTS}")
    logger.info(f"请求限额窗口/LimitWindow: {LIMIT_WINDOW} 秒")

    app.run(debug=True, host='0.0.0.0', port=int(os.environ.get('PORT', 7860)))