服务器部署知识库(Docker、Nginx)

Cactus li2024年5月15日...大约 9 分钟

一、介绍

本章主要讲如何讲项目部署到有公网的服务器上，练习 Linux、Docker、Nginx的线上操作。

在开始前优化上一章节遗留问题：添加模型上下文记忆功能。

修改项目中docs/dev-ops/nginx/html/js/index.js 文件里 startEventStream 方法：

function startEventStream(message) {
    if (isStreaming) return;
    if (!currentChatId) {
        console.error("Cannot start stream without a current chat ID.");
        return;
    }
    setStreamingState(true);

    if (currentEventSource) {
        currentEventSource.close();
    }

    const selectedRagTag = ragSelect.value; // Keep RAG selection logic
    const selectedAiModelValue = aiModelSelect.value;
    const selectedAiModelName = aiModelSelect.options[aiModelSelect.selectedIndex].getAttribute('model');

    if (!selectedAiModelName) {
        console.error("No AI model name selected!");
        setStreamingState(false);
        appendMessage("错误：未选择有效的 AI 模型。", true, false);
        return;
    }

    // --- WORKAROUND START ---
    // 1. Get chat history
    const chatData = getChatData(currentChatId);
    const history = chatData ? chatData.messages : [];

    // 2. Format history into a single string to prepend
    let historyString = "";
    // Limit history length to avoid excessively long URLs (adjust maxHistory as needed)
    const maxHistory = 10; // Example: Keep last 10 messages
    const startIndex = Math.max(0, history.length - maxHistory);

    for (let i = startIndex; i < history.length; i++) {
        const msg = history[i];
        if (typeof msg.content === 'string' && typeof msg.isAssistant === 'boolean') {
            // Important: Exclude <think> tags from the history string sent to the model
            // unless your model is specifically trained to handle them as part of the prompt.
            // Usually, you only want the actual conversation turns.
            const contentWithoutThink = msg.content.replace(/<think>.*?<\/think>/gs, '').trim();
            if (contentWithoutThink) { // Only add if there's actual content after removing think tags
                historyString += (msg.isAssistant ? "Assistant: " : "User: ") + contentWithoutThink + "\n";
            }
        }
    }

    // 3. Prepend history to the current message
    const combinedMessage = historyString + "User: " + message; // Clearly mark the new message
    // --- WORKAROUND END ---


    let url;
    const base = `http://localhost:7080/api/v1/${selectedAiModelValue}`;

    // --- Send the COMBINED message in the 'message' parameter ---
    const params = new URLSearchParams({
        // Send the combined history+current message string
        message: combinedMessage,
        model: selectedAiModelName
        // NO 'history' parameter here
    });

    if (selectedRagTag) {
        params.append('ragTag', selectedRagTag);
        // Decide how RAG interacts with history prepending.
        // Does the backend RAG process need the raw message or the combined one?
        // Assuming backend handles RAG based on the full 'message' param for now.
        url = `${base}/generate_stream_rag?${params.toString()}`;
    } else {
        url = `${base}/generate_stream?${params.toString()}`;
    }
    // --- END MODIFICATION ---

    console.log("Streaming URL (Workaround):", url); // URL will have a long 'message' param
    console.log("Combined Message sent:", combinedMessage); // Log the combined string

    currentEventSource = new EventSource(url);
    let accumulatedContent = '';
    let tempMessageWrapper = null;
    let streamEnded = false;
    const messageId = `ai-message-${Date.now()}`;

    // --- Create Unified Placeholder ---
    if (chatArea.style.display === 'none') {
        chatArea.style.display = 'block';
        welcomeMessage.style.display = 'none';
    }
    tempMessageWrapper = document.createElement('div');
    tempMessageWrapper.className = 'flex w-full mb-4 message-bubble justify-start';
    tempMessageWrapper.id = messageId;
    tempMessageWrapper.innerHTML = `
        <div class="flex gap-3 max-w-4xl w-full">
             <div class="w-8 h-8 rounded-full bg-green-100 flex items-center justify-center flex-shrink-0 mt-1 shadow-sm">
                <span class="text-green-600 text-sm font-semibold">AI</span>
            </div>
            <div class="message-content-wrapper bg-white border border-gray-200 px-4 py-3 rounded-lg shadow-sm min-w-[80px] flex-grow markdown-body">
                 <span class="streaming-cursor animate-pulse">▋</span>
            </div>
        </div>
    `;
    chatArea.appendChild(tempMessageWrapper);
    let messageContentWrapper = tempMessageWrapper.querySelector('.message-content-wrapper');
    let hasInjectedDetails = false;
    requestAnimationFrame(() => {
        chatArea.scrollTop = chatArea.scrollHeight;
    });


    // The rest of the onmessage, onerror, DOM handling logic remains the same
    // as in the previous snippet. The AI's response (accumulatedContent)
    // is still processed and displayed identically.
    currentEventSource.onmessage = function (event) {
        // ... (SAME onmessage logic as before - handling stream data, <think> tags, DOM updates) ...
        if (streamEnded) return;

        try {
            const data = JSON.parse(event.data);

            if (data.result?.output?.text !== undefined) {
                const newContent = data.result.output.text ?? '';
                accumulatedContent += newContent;

                // --- Process Accumulated Content ---
                const thinkRegex = /<think>(.*?)<\/think>/gs;
                let thinkingSteps = '';
                let match;
                const localThinkRegex = /<think>(.*?)<\/think>/gs;
                while ((match = localThinkRegex.exec(accumulatedContent)) !== null) {
                    thinkingSteps += match[1] + '\n';
                }
                thinkingSteps = thinkingSteps.trim();
                const finalAnswer = accumulatedContent.replace(/<think>.*?<\/think>/gs, '').trim();

                // --- Update DOM Dynamically ---
                messageContentWrapper = document.getElementById(messageId)?.querySelector('.message-content-wrapper');
                if (!messageContentWrapper) {
                    console.error("Message wrapper not found!");
                    return;
                }

                if (thinkingSteps && !hasInjectedDetails) {
                    messageContentWrapper.innerHTML = `
                        <details class="thinking-process" open>
                            <summary class="cursor-pointer text-sm text-gray-600 hover:text-gray-800 mb-2 focus:outline-none select-none">
                                思考过程... <span class="text-xs opacity-70">(点击展开/折叠)</span>
                            </summary>
                            <div class="thinking-steps-content markdown-body border-t border-gray-100 pt-2 pl-2 text-xs opacity-80 min-h-[20px]">

                            </div>
                        </details>
                        <div class="final-answer markdown-body pt-3">

                        </div>
                    `;
                    hasInjectedDetails = true;
                }

                // --- Populate Content ---
                if (hasInjectedDetails) {
                    const thinkingStepsDiv = messageContentWrapper.querySelector('.thinking-steps-content');
                    const finalAnswerDiv = messageContentWrapper.querySelector('.final-answer');

                    if (thinkingStepsDiv) {
                        thinkingStepsDiv.innerHTML = sanitizeHTML(marked.parse(thinkingSteps + '<span class="streaming-cursor animate-pulse">▋</span>'));
                        applyHighlightingAndCopyButtons(thinkingStepsDiv);
                    }
                    if (finalAnswerDiv) {
                        finalAnswerDiv.innerHTML = finalAnswer
                            ? sanitizeHTML(marked.parse(finalAnswer))
                            : '<span class="text-gray-400 text-sm">正在处理...</span>';
                        applyHighlightingAndCopyButtons(finalAnswerDiv);
                    }
                } else {
                    messageContentWrapper.innerHTML = sanitizeHTML(marked.parse(finalAnswer + '<span class="streaming-cursor animate-pulse">▋</span>'));
                    applyHighlightingAndCopyButtons(messageContentWrapper);
                }

                requestAnimationFrame(() => {
                    chatArea.scrollTop = chatArea.scrollHeight;
                });
            }

            // --- Handle Stream End ---
            if (data.result?.metadata?.finishReason === 'stop' || data.result?.metadata?.finishReason === 'STOP') {
                streamEnded = true;
                currentEventSource.close();

                // --- Final Processing of accumulatedContent ---
                const thinkRegex = /<think>(.*?)<\/think>/gs;
                let finalThinkingSteps = '';
                let match;
                const localThinkRegex = /<think>(.*?)<\/think>/gs; // Use new instance
                while ((match = localThinkRegex.exec(accumulatedContent)) !== null) {
                    finalThinkingSteps += match[1] + '\n';
                }
                finalThinkingSteps = finalThinkingSteps.trim();
                const finalFinalAnswer = accumulatedContent.replace(/<think>.*?<\/think>/gs, '').trim();

                // --- Final DOM Update ---
                messageContentWrapper = document.getElementById(messageId)?.querySelector('.message-content-wrapper');
                if (!messageContentWrapper) {
                    console.error("Message wrapper not found for final update!");
                    return;
                }

                messageContentWrapper.querySelectorAll('.streaming-cursor').forEach(c => c.remove());

                if (finalThinkingSteps) {
                    if (!hasInjectedDetails) {
                        messageContentWrapper.innerHTML = `
                            <details class="thinking-process" open>
                                <summary>思考过程 <span class="text-xs opacity-70">(来自历史记录)</span></summary>
                                <div class="thinking-steps-content markdown-body border-t border-gray-100 pt-2 pl-2 text-xs opacity-80"></div>
                            </details>
                            <div class="final-answer markdown-body pt-3"></div>
                         `;
                        hasInjectedDetails = true;
                    }
                    const thinkingStepsDiv = messageContentWrapper.querySelector('.thinking-steps-content');
                    const finalAnswerDiv = messageContentWrapper.querySelector('.final-answer');

                    if (thinkingStepsDiv) {
                        thinkingStepsDiv.innerHTML = sanitizeHTML(marked.parse(finalThinkingSteps));
                        applyHighlightingAndCopyButtons(thinkingStepsDiv);
                    } else {
                        console.error("Thinking steps div not found in final update!");
                    }

                    if (finalAnswerDiv) {
                        finalAnswerDiv.innerHTML = finalFinalAnswer ? sanitizeHTML(marked.parse(finalFinalAnswer)) : '';
                        applyHighlightingAndCopyButtons(finalAnswerDiv);
                    } else {
                        console.error("Final answer div not found in final update!");
                    }
                    // Update summary text after completion
                    const summaryElement = messageContentWrapper.querySelector('.thinking-process summary');
                    if (summaryElement) summaryElement.innerHTML = `思考过程 <span class="text-xs opacity-70">(点击展开/折叠)</span>`;


                } else {
                    messageContentWrapper.innerHTML = finalFinalAnswer ? sanitizeHTML(marked.parse(finalFinalAnswer)) : '';
                    if (!hasInjectedDetails) {
                        messageContentWrapper.classList.add('markdown-body');
                    }
                    applyHighlightingAndCopyButtons(messageContentWrapper);
                }

                // --- Save the complete message (including <think> tags) ---
                // IMPORTANT: Even with the workaround, save the ORIGINAL AI response
                // (accumulatedContent) to localStorage, *including* any <think> tags,
                // so the history display remains accurate. Don't save the combinedMessage.
                if (currentChatId && accumulatedContent.trim()) {
                    const chatData = getChatData(currentChatId);
                    if (chatData) {
                        chatData.messages.push({content: accumulatedContent, isAssistant: true});
                        localStorage.setItem(`chat_${currentChatId}`, JSON.stringify(chatData));
                        updateChatList();
                    }
                }

                currentEventSource = null;
                setStreamingState(false);
                messageInput.focus();
            }
        } catch (e) {
            console.error('Error processing stream event:', e, event.data);
        }

    };

    currentEventSource.onerror = function (error) {
        // ... (SAME onerror logic as before) ...
        console.error('EventSource encountered an error:', error);
        streamEnded = true;
        if (currentEventSource) {
            currentEventSource.close();
        }

        const errorText = '--- 抱歉，连接中断或发生错误 ---';
        messageContentWrapper = document.getElementById(messageId)?.querySelector('.message-content-wrapper');
        if (messageContentWrapper) {
            messageContentWrapper.querySelectorAll('.streaming-cursor').forEach(c => c.remove());
            const errorP = document.createElement('p');
            errorP.className = 'text-red-500 text-sm font-semibold mt-2 border-t pt-2';
            errorP.textContent = errorText;

            const finalAnswerDiv = messageContentWrapper.querySelector('.final-answer');
            if (finalAnswerDiv) {
                if (finalAnswerDiv.textContent.includes("正在处理")) finalAnswerDiv.innerHTML = '';
                finalAnswerDiv.appendChild(errorP);
            } else {
                if (messageContentWrapper.textContent === '▋') messageContentWrapper.innerHTML = '';
                messageContentWrapper.appendChild(errorP);
            }
            // Update summary text in case of error during thinking display
            const summaryElement = messageContentWrapper.querySelector('.thinking-process summary');
            if (summaryElement && summaryElement.textContent.includes("思考过程...")) {
                summaryElement.innerHTML = `思考过程 <span class="text-xs opacity-70">(已中断)</span>`;
            }

        } else {
            appendMessage(errorText, true, false);
        }

        currentEventSource = null;
        setStreamingState(false);
        messageInput.focus();
    };
}

后端接口改写 generate_stream 方法配合前端调用，实现上下文记忆功能。

/**
     * http://localhost:7080/api/v1/openai/generate_stream?model=deepseek-r1:1.5b&message=你是？
     */
    @GetMapping("generate_stream")
    @Override
    public Flux<ChatResponse> generateStream(
            @RequestParam("model") String model,
            @RequestParam("message") String currentMessage, // 为了清晰，重命名一下
            @RequestParam(name = "history", required = false, defaultValue = "[]") String history // 接收 history JSON 字符串
    ) {
        List<Message> conversation = new ArrayList<>(); // 用于存储完整对话的列表

        // 尝试解析 history JSON
        try {
            // 将 JSON 字符串解析为 Map 列表，每个 Map 代表一条消息
            List<Map<String, String>> historyList = objectMapper.readValue(history, new TypeReference<List<Map<String, String>>>() {});
            for (Map<String, String> msg : historyList) {
                // 根据 'role' 创建相应的 Message 对象
                if ("user".equalsIgnoreCase(msg.get("role"))) {
                    conversation.add(new UserMessage(msg.get("content")));
                } else if ("assistant".equalsIgnoreCase(msg.get("role"))) {
                    // 如果历史记录中保存了 <think> 标签，也一并包含
                    conversation.add(new AssistantMessage(msg.get("content")));
                }
            }
        } catch (Exception e) {
            // 记录错误或处理无效的 history JSON - 可以考虑返回一个错误的 Flux？
            System.err.println("解析聊天历史 JSON 时出错: " + e.getMessage());
            // 为简单起见，如果解析失败，则在没有历史记录的情况下继续
        }

        // 添加当前用户发送的消息
        conversation.add(new UserMessage(currentMessage));

        // 使用完整的对话历史创建 Prompt
        Prompt prompt = new Prompt(
                conversation, // 传入包含所有消息的 List<Message>
                OllamaOptions.builder()
                        .model(model)
                        .build()
        );

        // 将包含完整对话的 Prompt 发送给模型
        return openAiChatModel.stream(prompt);
    }

OllamaController 中的方法按照此修改。

二、部署过程

下图为本次部署的过程；

从本地开发到云端部署，流程为：开发 → 构建Docker镜像 → 推送至Docker Hub → 云端拉取镜像 → 运行Spring Boot应用并依赖支持服务执行。

三、项目部署

1. 工程结构

2. 镜像构建

本地安装了 Docker Desktop 直接运行 lxf-rag-app/build.sh 脚本即可。

Docker 部署在云服务云服务器上，需要通过开始 Docker Engine API来访问操作 Docker。

查询 docker.service 配置路径：

systemctl show -p FragmentPath docker.service

编辑 docker.service 开启 Docker Engine API

vim /lib/systemd/system/docker.service
[Unit]
Description=Docker Application Container Engine
Documentation=https://docs.docker.com
After=network-online.target docker.socket firewalld.service containerd.service time-set.target
Wants=network-online.target containerd.service
Requires=docker.socket

[Service]
Type=notify
# the default is not to use systemd for cgroups because the delegate issues still
# exists and systemd currently does not support the cgroup feature set required
# for containers run by docker

# 注释原来的内容（添加 tcp://0.0.0.0:4610 -H unix:///var/run/docker.sock）
#ExecStart=/usr/bin/dockerd -H fd:// --containerd=/run/containerd/containerd.sock
ExecStart=/usr/bin/dockerd -H tcp://0.0.0.0:4610 -H unix:///var/run/docker.sock


ExecReload=/bin/kill -s HUP $MAINPID
TimeoutStartSec=0
RestartSec=2
Restart=always

# Note that StartLimit* options were moved from "Service" to "Unit" in systemd 229.
# Both the old, and new location are accepted by systemd 229 and up, so using the old location
# to make them work for either version of systemd.
StartLimitBurst=3

# Note that StartLimitInterval was renamed to StartLimitIntervalSec in systemd 230.
# Both the old, and new name are accepted by systemd 230 and up, so using the old name to make
# this option work for either version of systemd.
StartLimitInterval=60s

# Having non-zero Limit*s causes performance problems due to accounting overhead
# in the kernel. We recommend using cgroups to do container-local accounting.
LimitNOFILE=infinity
LimitNPROC=infinity
LimitCORE=infinity

# Comment TasksMax if your systemd version does not support it.
# Only systemd 226 and above support this option.
TasksMax=infinity

# set delegate yes so that systemd does not reset the cgroups of docker containers
Delegate=yes

# kill only the docker process, not all processes in the cgroup
KillMode=process
OOMScoreAdjust=-500

[Install]
WantedBy=multi-user.target

重启 Docker :

sudo systemctl daemon-reload
sudo systemctl restart docker

安装 Docker 插件：

在 Idea 中添加 Docker Engine API 的访问路径：

上图出现了：com.intellij.execution.process.ProcessNotCreatedException: Cannot run program "docker": CreateProcess error=2, 系统找不到指定的文件 的错误，这是因为新本本的 idea 需要配置 Docker CLI 了。

下载 Docker CLI
- 访问 Docker 官方静态二进制文件下载页面：https://download.docker.com/win/static/stable/x86_64/。
将 Docker CLI 添加到系统路径
- 将 docker.exe 移动到一个固定目录，例如 C:\docker。
- 将该目录添加到系统环境变量 Path 中：
  - 右键“此电脑” → “属性” → “高级系统设置” → “环境变量”。
  - 在“系统变量”中找到 Path，点击“编辑”，添加 C:\docker。
- 打开命令提示符，运行 docker --version 确认安装成功。
配置 IntelliJ IDEA
选中配置好的 docker.exe:
image-20250430165131488

在开始构建镜像时如果出现以下错误，需要下载并配置 buildx工具。

下载地址：https://github.com/docker/buildx/releases/tag/v0.23.0

在用户文件夹下（或在文件管理地址栏输入 %USERPROFILE% 定位），新建 .docker 文件夹（注意有个“.”），下面再新建一个cli-plugins文件夹，把下载好的文件重命名 docker-buildx.exe，然后放入到该目录下。

解决完所有错误后进行构建，输出以下内容代表镜像构建成功：

3. 操作脚本

在 https://hub.docker.com/repository/create 创建你的镜像名称。（需要开代理才能打开）
Linux 代理软件开源地址：https://github.com/nelvko/clash-for-linux-install

4. 上传镜像

通过命令方式将镜像推送至 Docker hub，也可以使用本地 Docker 软件进行上传。

$ docker login -u cactuslixf
$ docker push cactuslixf/ai-rag-knowledge-app:1.2

如下图是上传成功的案例：

注意需要开启代理才能 push 成功！也可以购买一台国外的 vps 来验证操作。
Linux 代理软件开源地址：https://github.com/nelvko/clash-for-linux-install