[{"data":1,"prerenderedAt":1597},["ShallowReactive",2],{"navigation":3,"\u002Fintro\u002Fllm-probability":189,"\u002Fintro\u002Fllm-probability-surround":1593},[4,35,57,75,101,123,149,171],{"title":5,"icon":6,"path":7,"stem":8,"children":9,"page":34},"第 1 章：认识 Claude Code","i-lucide-rocket","\u002Fintro","1.intro",[10,14,18,22,26,30],{"title":11,"path":12,"stem":13},"什么是 Claude Code","\u002Fintro\u002Fwhat-is-claude-code","1.intro\u002F1.what-is-claude-code",{"title":15,"path":16,"stem":17},"Claude Code 与 Copilot、Cursor、Windsurf 的本质区别","\u002Fintro\u002Fvs-competitors","1.intro\u002F2.vs-competitors",{"title":19,"path":20,"stem":21},"AI 编程助手生态全景与选型指南","\u002Fintro\u002Fecosystem-guide","1.intro\u002F3.ecosystem-guide",{"title":23,"path":24,"stem":25},"LLM 的概率本质","\u002Fintro\u002Fllm-probability","1.intro\u002F4.llm-probability",{"title":27,"path":28,"stem":29},"从聊天机器人到 Agent","\u002Fintro\u002Ffrom-chatbot-to-agent","1.intro\u002F5.from-chatbot-to-agent",{"title":31,"path":32,"stem":33},"Claude Code 的 Agentic Loop 全拆解","\u002Fintro\u002Fagentic-loop","1.intro\u002F6.agentic-loop",false,{"title":36,"icon":37,"path":38,"stem":39,"children":40,"page":34},"第 2 章：安装与配置","i-lucide-settings","\u002Fsetup","2.setup",[41,45,49,53],{"title":42,"path":43,"stem":44},"系统要求与安装方式","\u002Fsetup\u002Fsystem-requirements","2.setup\u002F1.system-requirements",{"title":46,"path":47,"stem":48},"认证、登录与多账户管理","\u002Fsetup\u002Fauthentication","2.setup\u002F2.authentication",{"title":50,"path":51,"stem":52},"选择你的界面","\u002Fsetup\u002Fchoose-interface","2.setup\u002F3.choose-interface",{"title":54,"path":55,"stem":56},"Coding Plan","\u002Fsetup\u002Fcoding-plan","2.setup\u002F4.coding-plan",{"title":58,"icon":59,"path":60,"stem":61,"children":62,"page":34},"第 3 章：快速上手","i-lucide-hand","\u002Fquickstart","3.quickstart",[63,67,71],{"title":64,"path":65,"stem":66},"启动、交互模式与基本命令","\u002Fquickstart\u002Fstartup","3.quickstart\u002F1.startup",{"title":68,"path":69,"stem":70},"让 Claude 理解你的项目","\u002Fquickstart\u002Fcodebase-understanding","3.quickstart\u002F2.codebase-understanding",{"title":72,"path":73,"stem":74},"第一次代码变更","\u002Fquickstart\u002Ffirst-change","3.quickstart\u002F3.first-change",{"title":76,"icon":77,"path":78,"stem":79,"children":80,"page":34},"第 4 章：核心功能","i-lucide-laptop","\u002Fcore-features","4.core-features",[81,85,89,93,97],{"title":82,"path":83,"stem":84},"代码库全景扫描与模块关系分析","\u002Fcore-features\u002Fcodebase-scan","4.core-features\u002F1.codebase-scan",{"title":86,"path":87,"stem":88},"代码编辑与生成","\u002Fcore-features\u002Fedit-generate","4.core-features\u002F2.edit-generate",{"title":90,"path":91,"stem":92},"测试与调试","\u002Fcore-features\u002Ftest-debug","4.core-features\u002F3.test-debug",{"title":94,"path":95,"stem":96},"Git 工作流","\u002Fcore-features\u002Fgit-workflow","4.core-features\u002F4.git-workflow",{"title":98,"path":99,"stem":100},"工具链执行","\u002Fcore-features\u002Ftoolchain","4.core-features\u002F5.toolchain",{"title":102,"icon":103,"path":104,"stem":105,"children":106,"page":34},"第 5 章：进阶配置","i-lucide-wrench","\u002Fadvanced","5.advanced",[107,111,115,119],{"title":108,"path":109,"stem":110},"CLAUDE.md","\u002Fadvanced\u002Fclaude-md","5.advanced\u002F1.claude-md",{"title":112,"path":113,"stem":114},"Skills","\u002Fadvanced\u002Fskills","5.advanced\u002F2.skills",{"title":116,"path":117,"stem":118},"MCP","\u002Fadvanced\u002Fmcp","5.advanced\u002F3.mcp",{"title":120,"path":121,"stem":122},"Hooks 与 Plan 模式","\u002Fadvanced\u002Fhooks-plan","5.advanced\u002F4.hooks-plan",{"title":124,"icon":125,"path":126,"stem":127,"children":128,"page":34},"第 6 章：实战开发","i-lucide-hammer","\u002Fpractice","6.practice",[129,133,137,141,145],{"title":130,"path":131,"stem":132},"需求分析与架构设计","\u002Fpractice\u002Frequirements-architecture","6.practice\u002F1.requirements-architecture",{"title":134,"path":135,"stem":136},"项目脚手架搭建与技术选型","\u002Fpractice\u002Fscaffolding","6.practice\u002F2.scaffolding",{"title":138,"path":139,"stem":140},"核心功能实现","\u002Fpractice\u002Fcore-features","6.practice\u002F3.core-features",{"title":142,"path":143,"stem":144},"测试覆盖、代码审查与质量调优","\u002Fpractice\u002Ftesting-quality","6.practice\u002F4.testing-quality",{"title":146,"path":147,"stem":148},"部署上线与成果分享","\u002Fpractice\u002Fdeployment","6.practice\u002F5.deployment",{"title":150,"icon":151,"path":152,"stem":153,"children":154,"page":34},"第 7 章：心法层","i-lucide-brain","\u002Fmindset","7.mindset",[155,159,163,167],{"title":156,"path":157,"stem":158},"提示词设计原则","\u002Fmindset\u002Fprompt-design","7.mindset\u002F1.prompt-design",{"title":160,"path":161,"stem":162},"上下文管理策略","\u002Fmindset\u002Fcontext-management","7.mindset\u002F2.context-management",{"title":164,"path":165,"stem":166},"安全与权限控制","\u002Fmindset\u002Fsecurity","7.mindset\u002F3.security",{"title":168,"path":169,"stem":170},"Boris Cherny 的 9 条实战心法与团队推广经验","\u002Fmindset\u002Fboris-cherny-tips","7.mindset\u002F4.boris-cherny-tips",{"title":172,"icon":173,"path":174,"stem":175,"children":176,"page":34},"附录","i-lucide-paperclip","\u002Fappendix","8.appendix",[177,181,185],{"title":178,"path":179,"stem":180},"常用命令速查表","\u002Fappendix\u002Fa.command-cheatsheet","8.appendix\u002Fa.command-cheatsheet",{"title":182,"path":183,"stem":184},"AI 核心术语汇编","\u002Fappendix\u002Fb.ai-terminology","8.appendix\u002Fb.ai-terminology",{"title":186,"path":187,"stem":188},"资源链接与延伸阅读","\u002Fappendix\u002Fc.resources","8.appendix\u002Fc.resources",{"id":190,"title":23,"body":191,"description":1585,"extension":1586,"links":1587,"meta":1588,"navigation":1590,"path":24,"seo":1591,"stem":25,"__hash__":1592},"docs\u002F1.intro\u002F4.llm-probability.md",{"type":192,"value":193,"toc":1537},"minimark",[194,203,206,209,214,219,224,227,243,247,250,261,283,287,290,301,303,307,311,318,336,340,351,357,367,371,378,446,455,459,468,470,474,485,489,492,496,499,519,532,536,539,543,546,555,559,572,620,629,631,635,639,656,660,672,675,678,681,701,705,718,727,776,779,783,801,804,808,825,827,831,835,849,852,870,874,887,890,904,907,911,922,935,937,941,945,960,963,989,993,1005,1011,1025,1029,1038,1049,1051,1055,1059,1062,1068,1074,1080,1086,1090,1096,1102,1108,1114,1118,1129,1161,1170,1174,1187,1202,1205,1207,1211,1214,1240,1252,1255,1257,1260],[195,196,197,198,202],"p",{},"当你向 ChatGPT、Claude 或任何大语言模型（LLM）提问时，它给出的回答往往流畅、有条理，甚至令人信服。但有时候，它会\"一本正经地胡说八道\"——编造不存在的事实、引用虚假的论文、给出错误的日期。这种现象被称为",[199,200,201],"strong",{},"幻觉（Hallucination）","，它是 LLM 概率本质的固有副产品，而非某种可以简单\"修复\"的 bug。",[195,204,205],{},"要理解为什么 AI 会\"胡说\"，我们需要从底层机制出发，揭开 Next-Token Prediction、Tokenization、采样策略和涌现能力的面纱。",[207,208],"hr",{},[210,211,213],"h2",{"id":212},"一next-token-predictionllm-的核心机制","一、Next-Token Prediction：LLM 的核心机制",[215,216,218],"h3",{"id":217},"_11-一句话概括","1.1 一句话概括",[195,220,221],{},[199,222,223],{},"所有自回归 LLM（如 GPT、Claude、Gemini）的核心任务只有一个：给定前面的文本，预测下一个 token 的概率分布。",[195,225,226],{},"这就是 LLM 的\"心跳\"——一个不断重复的循环：",[228,229,230,234,237,240],"ol",{},[231,232,233],"li",{},"接收当前上下文（你输入的问题 + 已生成的文本）",[231,235,236],{},"计算词汇表中每个 token 的预测概率",[231,238,239],{},"按某种策略选择一个 token",[231,241,242],{},"将选中的 token 追加到上下文，回到步骤 1",[215,244,246],{"id":245},"_12-数学表达","1.2 数学表达",[195,248,249],{},"从数学上看，生成过程是一个条件概率的链式分解：",[251,252,257],"pre",{"className":253,"code":255,"language":256},[254],"language-text","P(x₁, x₂, ..., xₙ) = P(x₁) × P(x₂|x₁) × P(x₃|x₁,x₂) × ... × P(xₙ|x₁,...,xₙ₋₁)\n","text",[258,259,255],"code",{"__ignoreMap":260},"",[195,262,263,264,267,268,271,272],{},"其中每个 ",[258,265,266],{},"P(xₜ | x\u003Cₜ)"," 都是模型在当前时刻对下一个 token 的概率预测。模型内部通过 Transformer 架构的**掩码自注意力（Masked Self-Attention）",[199,269,270],{},"和","前馈网络（Feed-Forward Networks）**来计算这个分布，最终通过 Softmax 层将原始分数（logits）归一化为概率。",[273,274,275],"sup",{},[276,277,282],"a",{"href":278,"ariaDescribedBy":279,"dataFootnoteRef":260,"id":281},"#user-content-fn-1",[280],"footnote-label","user-content-fnref-1","1",[215,284,286],{"id":285},"_13-直观类比","1.3 直观类比",[195,288,289],{},"想象一个超级 autocomplete（自动补全）。当你在手机上打字时，输入法会根据你已输入的内容，预测下一个最可能的字或词。LLM 做的本质上就是这件事——只不过它的\"训练数据\"是整个互联网上的文本，它的\"参数\"有数百亿甚至数千亿个，它的预测能力远超任何输入法。",[195,291,292,293],{},"但关键区别在于：LLM 的能力源于统计学习，它通过预测下一个 token 的概率分布来生成文本。",[273,294,295],{},[276,296,300],{"href":297,"ariaDescribedBy":298,"dataFootnoteRef":260,"id":299},"#user-content-fn-2",[280],"user-content-fnref-2","2",[207,302],{},[210,304,306],{"id":305},"二token-是什么文本的数字化切片","二、Token 是什么：文本的数字化切片",[215,308,310],{"id":309},"_21-token-的本质","2.1 Token 的本质",[195,312,313,314,317],{},"LLM 不直接处理文字，它处理的是 ",[199,315,316],{},"token","——文本被分词器（Tokenizer）切分后的最小单元。一个 token 可以是：",[319,320,321,324,327,330,333],"ul",{},[231,322,323],{},"一个完整的英文单词（如 \"cat\"）",[231,325,326],{},"一个单词的一部分（如 \"play\" + \"ing\"）",[231,328,329],{},"一个中文字符或字符组合",[231,331,332],{},"一个标点符号",[231,334,335],{},"甚至一段代码片段",[215,337,339],{"id":338},"_22-分词算法bpe-与-sentencepiece","2.2 分词算法：BPE 与 SentencePiece",[195,341,342,343],{},"现代 LLM 主要使用两种子词（subword）分词算法：",[273,344,345],{},[276,346,350],{"href":347,"ariaDescribedBy":348,"dataFootnoteRef":260,"id":349},"#user-content-fn-3",[280],"user-content-fnref-3","3",[195,352,353,356],{},[199,354,355],{},"Byte-Pair Encoding（BPE）","：从字符级别开始，迭代合并训练数据中最频繁的相邻字符对。例如，如果 \"th\" 在语料中频繁出现，它就会被合并为一个独立的 token。GPT 系列模型使用的就是 BPE。",[195,358,359,362,363,366],{},[199,360,361],{},"SentencePiece（一种无监督文本分词器）","：直接操作原始文本的 Unicode 字节流，不依赖空格预分词。它将空格也视为一个可学习的 token（通常用 ",[258,364,365],{},"▁"," 表示），因此对多语言和噪声文本更鲁棒。T5、LLaMA 等模型采用此方案。",[215,368,370],{"id":369},"_23-不同语言的-token-差异","2.3 不同语言的 Token 差异",[195,372,373,374,377],{},"这是许多中文用户容易忽视的一点：",[199,375,376],{},"不同语言的 token 效率差异巨大","。",[379,380,381,397],"table",{},[382,383,384],"thead",{},[385,386,387,391,394],"tr",{},[388,389,390],"th",{},"语言",[388,392,393],{},"示例文本",[388,395,396],{},"大致 Token 数",[398,399,400,411,422,433],"tbody",{},[385,401,402,406,409],{},[403,404,405],"td",{},"英语",[403,407,408],{},"\"Hello world\"",[403,410,300],{},[385,412,413,416,419],{},[403,414,415],{},"中文",[403,417,418],{},"\"你好世界\"",[403,420,421],{},"4-8",[385,423,424,427,430],{},[403,425,426],{},"日语",[403,428,429],{},"\"こんにちは\"",[403,431,432],{},"5-10",[385,434,435,438,443],{},[403,436,437],{},"代码",[403,439,440],{},[258,441,442],{},"function main()",[403,444,445],{},"3-5",[195,447,448,449],{},"中文通常比英文消耗更多 token，这意味着：同样的上下文窗口，中文能容纳的内容更少；同样的输出长度，中文的推理成本更高。这是因为主流分词器的训练语料以英文为主，对中文的压缩效率较低。",[273,450,451],{},[276,452,350],{"href":347,"ariaDescribedBy":453,"dataFootnoteRef":260,"id":454},[280],"user-content-fnref-3-2",[215,456,458],{"id":457},"_24-词汇表大小","2.4 词汇表大小",[195,460,461,462],{},"典型 LLM 的词汇表大小在 30,000 到 100,000 个 token 之间。词汇表越大，单个序列需要的 token 数越少（更好的压缩），但嵌入层（Embedding Layer）的参数量和内存占用也越大。这是一个需要权衡的设计决策。",[273,463,464],{},[276,465,350],{"href":347,"ariaDescribedBy":466,"dataFootnoteRef":260,"id":467},[280],"user-content-fnref-3-3",[207,469],{},[210,471,473],{"id":472},"三概率分布与采样策略控制创造力的旋钮","三、概率分布与采样策略：控制\"创造力\"的旋钮",[195,475,476,477],{},"当模型计算出下一个 token 的概率分布后，如何选择具体的 token？这就是**解码策略（Decoding Strategy）**的作用。不同的策略在\"确定性\"和\"多样性\"之间做出不同权衡。",[273,478,479],{},[276,480,484],{"href":481,"ariaDescribedBy":482,"dataFootnoteRef":260,"id":483},"#user-content-fn-4",[280],"user-content-fnref-4","4",[215,486,488],{"id":487},"_31-greedy-decoding贪心解码","3.1 Greedy Decoding（贪心解码）",[195,490,491],{},"每次都选择概率最高的 token。优点是简单、确定性强；缺点是输出容易陷入重复、缺乏变化，像是一个只会说\"标准答案\"的机器人。",[215,493,495],{"id":494},"_32-temperature温度","3.2 Temperature（温度）",[195,497,498],{},"Temperature 是一个控制概率分布\"尖锐程度\"的参数：",[319,500,501,507,513],{},[231,502,503,506],{},[199,504,505],{},"Temperature → 0","：分布变得极度尖锐，接近贪心解码，输出高度确定",[231,508,509,512],{},[199,510,511],{},"Temperature = 1","：保持原始概率分布不变",[231,514,515,518],{},[199,516,517],{},"Temperature > 1","：分布被\"压平（flatten）\"，低概率 token 获得更多机会，输出更随机、更有\"创意\"",[195,520,521,522,525,526],{},"数学上，Temperature 通过对 logits 进行缩放来实现：",[258,523,524],{},"logits' = logits \u002F temperature","。温度越低，高概率 token 的优势越明显；温度越高，概率分布越均匀。",[273,527,528],{},[276,529,484],{"href":481,"ariaDescribedBy":530,"dataFootnoteRef":260,"id":531},[280],"user-content-fnref-4-2",[215,533,535],{"id":534},"_33-top-k-采样","3.3 Top-k 采样",[195,537,538],{},"只从概率最高的 k 个 token 中采样，其余 token 的概率置为零。例如 Top-50 意味着模型只在最可能的 50 个词中选择。这避免了选中极度不可能的\"怪词\"，同时保留了一定的随机性。",[215,540,542],{"id":541},"_34-top-pnucleus-sampling核采样","3.4 Top-p（Nucleus Sampling，核采样）",[195,544,545],{},"比 Top-k 更动态的策略：从累积概率达到阈值 p 的最小 token 集合中采样。例如 Top-p=0.9 意味着只考虑概率累积到 90% 的那些 token，无论这个集合包含 10 个还是 1000 个 token。",[195,547,548,549],{},"Top-p 的优势在于自适应：当模型对下一个词\"很确定\"时（某个词概率高达 95%），集合可能只包含 1-2 个词；当模型\"不确定\"时，集合会自动扩大，纳入更多候选。",[273,550,551],{},[276,552,484],{"href":481,"ariaDescribedBy":553,"dataFootnoteRef":260,"id":554},[280],"user-content-fnref-4-3",[215,556,558],{"id":557},"_35-实际调参建议","3.5 实际调参建议",[195,560,561,562,565,566],{},"以下参数建议基于社区实践经验总结，非官方推荐。OpenAI 官方 API 文档建议",[199,563,564],{},"不同时调整 Temperature 和 Top-p","，通常只调整其中一个参数即可。",[273,567,568],{},[276,569,484],{"href":481,"ariaDescribedBy":570,"dataFootnoteRef":260,"id":571},[280],"user-content-fnref-4-4",[379,573,574,584],{},[382,575,576],{},[385,577,578,581],{},[388,579,580],{},"场景",[388,582,583],{},"推荐参数（二选一）",[398,585,586,598,609],{},[385,587,588,591],{},[403,589,590],{},"代码生成、数学推理",[403,592,593,594,597],{},"Temperature: 0.0-0.3 ",[199,595,596],{},"或"," Top-p: 0.1-0.5",[385,599,600,603],{},[403,601,602],{},"问答、事实检索",[403,604,605,606,608],{},"Temperature: 0.1-0.5 ",[199,607,596],{}," Top-p: 0.5-0.9",[385,610,611,614],{},[403,612,613],{},"创意写作、头脑风暴",[403,615,616,617,619],{},"Temperature: 0.7-1.0 ",[199,618,596],{}," Top-p: 0.9",[621,622,623],"blockquote",{},[195,624,625,628],{},[199,626,627],{},"注意","：不同模型对参数的取值范围不同。例如 Anthropic 模型的 Temperature 范围为 0-1，而 OpenAI 模型支持 0-2。Top-p=1.0 意味着完全取消限制，通常不推荐在生产环境中使用。",[207,630],{},[210,632,634],{"id":633},"四为什么-llm-会幻觉概率机制的固有副产品","四、为什么 LLM 会\"幻觉\"：概率机制的固有副产品",[215,636,638],{"id":637},"_41-幻觉的定义","4.1 幻觉的定义",[195,640,641,644,645,648],{},[199,642,643],{},"幻觉","指模型生成看似合理但实际错误的内容。OpenAI 在 2025 年发表的研究论文《Why Language Models Hallucinate》中给出了一个核心论断：",[199,646,647],{},"语言模型产生幻觉，是因为标准的训练和评估流程奖励\"猜测\"而非\"承认不确定\"。",[273,649,650],{},[276,651,655],{"href":652,"ariaDescribedBy":653,"dataFootnoteRef":260,"id":654},"#user-content-fn-5",[280],"user-content-fnref-5","5",[215,657,659],{"id":658},"_42-预训练阶段的统计根源","4.2 预训练阶段的统计根源",[195,661,662,663,377,666],{},"预训练阶段，模型通过预测海量文本中的下一个词来学习。关键在于：",[199,664,665],{},"训练数据中只有\"正面例子\"（真实出现的文本），没有\"负面标签\"（标注哪些陈述是假的）",[273,667,668],{},[276,669,655],{"href":652,"ariaDescribedBy":670,"dataFootnoteRef":260,"id":671},[280],"user-content-fnref-5-2",[195,673,674],{},"OpenAI 的研究者打了一个比方：在图像识别中，如果数百万张猫狗照片被标注为\"猫\"或\"狗\"，算法可以学会可靠分类。但如果标注的是每只宠物的生日——而生日本质上是随机的——那么无论算法多先进，这个任务必然产生错误。",[195,676,677],{},"不过需要注意，与图像识别不同，语言模型中的许多事实（如\"巴黎是法国首都\"）可以从训练数据的高频共现中被准确学习。幻觉主要发生在低频、任意或时效性强的事实上，而非所有事实。",[195,679,680],{},"同理，在预训练中：",[319,682,683,689],{},[231,684,685,688],{},[199,686,687],{},"拼写和括号","遵循一致的模式，随着模型规模增大，这类错误会消失",[231,690,691,694,695],{},[199,692,693],{},"低频的任意事实","（如某人的生日、某篇论文的标题）无法从统计模式中被准确预测，因此必然导致幻觉",[273,696,697],{},[276,698,655],{"href":652,"ariaDescribedBy":699,"dataFootnoteRef":260,"id":700},[280],"user-content-fnref-5-3",[215,702,704],{"id":703},"_43-评估机制的激励扭曲","4.3 评估机制的激励扭曲",[195,706,707,708,711,712],{},"当前主流的模型评估方式加剧了幻觉问题。大多数基准测试只关注",[199,709,710],{},"准确率","（答对的比例），而不区分\"答错\"和\"不回答\"。",[273,713,714],{},[276,715,655],{"href":652,"ariaDescribedBy":716,"dataFootnoteRef":260,"id":717},[280],"user-content-fnref-5-4",[195,719,720,721],{},"想象一场选择题考试：你不知道答案时，盲猜有 1\u002F4 的概率得分；留空则 guaranteed 零分。在只统计准确率的评分体系下，猜测的模型比诚实的模型\"看起来更好\"。OpenAI 的实验数据显示：",[273,722,723],{},[276,724,655],{"href":652,"ariaDescribedBy":725,"dataFootnoteRef":260,"id":726},[280],"user-content-fnref-5-5",[379,728,729,742],{},[382,730,731],{},[385,732,733,736,739],{},[388,734,735],{},"指标",[388,737,738],{},"gpt-5-thinking-mini",[388,740,741],{},"OpenAI o4-mini",[398,743,744,755,765],{},[385,745,746,749,752],{},[403,747,748],{},"弃权率（不回答）",[403,750,751],{},"52%",[403,753,754],{},"1%",[385,756,757,759,762],{},[403,758,710],{},[403,760,761],{},"22%",[403,763,764],{},"24%",[385,766,767,770,773],{},[403,768,769],{},"错误率（幻觉）",[403,771,772],{},"26%",[403,774,775],{},"75%",[195,777,778],{},"从准确率看，o4-mini 略胜一筹；但它的错误率（幻觉率）是前者的近 3 倍。 strategically guessing when uncertain improves accuracy but increases errors and hallucinations.",[215,780,782],{"id":781},"_44-校准calibration与真相","4.4 校准（Calibration）与真相",[195,784,785,786,793],{},"预训练后的模型通常是",[199,787,788,789,792],{},"校准良好（well-calibrated）",[199,790,791],{},"的——即模型输出的概率与真实正确率大致匹配。但 OpenAI 的研究者指出：","\"校准良好\"不等于\"说真话\"。一个模型可以非常一致地、自信地犯错。",[273,794,795],{},[276,796,800],{"href":797,"ariaDescribedBy":798,"dataFootnoteRef":260,"id":799},"#user-content-fn-6",[280],"user-content-fnref-6","6",[195,802,803],{},"校准意味着：当模型说\"我有 80% 把握\"时，它确实在约 80% 的情况下是对的。但如果那 20% 的错误恰好发生在关键事实上，对用户来说仍然是灾难。",[215,805,807],{"id":806},"_45-为什么模型不说我不知道","4.5 为什么模型不说\"我不知道\"",[195,809,810,811,814,815,818,819],{},"根本原因在于训练目标的错位。模型被训练来",[199,812,813],{},"最大化训练数据的似然","（即让预测尽可能接近真实文本），而不是",[199,816,817],{},"最大化真实性","。在训练数据中，\"我不知道\"出现的频率远低于直接猜测的尝试。此外，后训练阶段（如基于人类反馈的强化学习，RLHF）如果不对\"错误回答\"和\"弃权\"做差异化惩罚，模型就没有动力去承认不确定。",[273,820,821],{},[276,822,655],{"href":652,"ariaDescribedBy":823,"dataFootnoteRef":260,"id":824},[280],"user-content-fnref-5-6",[207,826],{},[210,828,830],{"id":829},"五为什么-llm-不是思考而是统计模式匹配","五、为什么 LLM 不是\"思考\"而是统计模式匹配",[215,832,834],{"id":833},"_51-随机鹦鹉stochastic-parrot隐喻","5.1 \"随机鹦鹉\"（Stochastic Parrot）隐喻",[195,836,837,838,377,841],{},"2021 年，华盛顿大学的 Emily M. Bender 等学者在论文《On the Dangers of Stochastic Parrots》中提出了这个著名隐喻：",[199,839,840],{},"LLM 就像一只随机鹦鹉——它并不理解所说内容的含义，只是根据统计模式\"鹦鹉学舌\"般地组合词语",[273,842,843],{},[276,844,848],{"href":845,"ariaDescribedBy":846,"dataFootnoteRef":260,"id":847},"#user-content-fn-7",[280],"user-content-fnref-7","7",[195,850,851],{},"这个隐喻的核心论点是：",[319,853,854,864,867],{},[231,855,856,857,860,861],{},"LLM 从训练数据中学习的是",[199,858,859],{},"形式上的共现模式","，而非",[199,862,863],{},"语义上的因果理解",[231,865,866],{},"模型可以流畅地讨论\"重力\"，但它从未\"体验\"过物体下落",[231,868,869],{},"模型可以生成关于\"疼痛\"的描述，但它没有神经系统",[215,871,873],{"id":872},"_52-模式匹配-vs-推理","5.2 模式匹配 vs. 推理",[195,875,876,877,880,881],{},"当 LLM 解决一道数学题时，它并非在\"推理\"——至少不是人类意义上的推理。它是在匹配训练数据中见过的类似问题的",[199,878,879],{},"文本模式","：",[273,882,883],{},[276,884,300],{"href":297,"ariaDescribedBy":885,"dataFootnoteRef":260,"id":886},[280],"user-content-fnref-2-2",[195,888,889],{},"不过，2025-2026 年的研究表明，大模型内部确实形成了可解释的概念表征和推理路径。o1\u002Fo3 系列通过延长\"思考链\"显著提升了数学和逻辑推理的可靠性。因此，\"统计模式匹配\"与\"推理\"之间的界限可能比我们想象的更模糊——问题的关键不在于 LLM 是否\"真正理解\"，而在于它在什么条件下可靠、在什么条件下会失败。",[228,891,892,895,898,901],{},[231,893,894],{},"识别问题中的关键词和结构（\"如果...那么...\"、\"求解 x\"）",[231,896,897],{},"检索训练数据中相似问题的\"解答模板\"",[231,899,900],{},"按概率填充模板中的变量和中间步骤",[231,902,903],{},"输出看起来最\"合理\"的答案",[195,905,906],{},"这个过程在很多时候能得到正确答案，因为训练数据包含了大量人类解答的数学问题。但一旦遇到训练数据中未覆盖的变体，模式匹配就会失效——而模型仍然会以同样的自信输出错误答案。",[215,908,910],{"id":909},"_53-涌现的推理表象","5.3 涌现的\"推理\"表象",[195,912,913,914],{},"需要承认的是，随着模型规模增大，LLM 确实展现出了一些令人惊讶的能力——链式思考（Chain-of-Thought）、少样本学习（Few-Shot Learning）、甚至某种程度的抽象概括。Anthropic 的研究团队通过**回路追踪（Circuit Tracing）**技术，发现模型内部确实存在可解释的概念表征（如\"大小\"、\"相反\"等概念的特征激活）。",[273,915,916],{},[276,917,921],{"href":918,"ariaDescribedBy":919,"dataFootnoteRef":260,"id":920},"#user-content-fn-8",[280],"user-content-fnref-8","8",[195,923,924,925,928,929],{},"但这并不改变根本事实：",[199,926,927],{},"这些能力是从统计学习中涌现出来的，而非通过符号逻辑或因果理解获得的","。正如 Anthropic 的研究者所言，理解 LLM 的挑战类似于生物学理解生物体——虽然基本进化原理简单，但产生的机制极其复杂。",[273,930,931],{},[276,932,921],{"href":918,"ariaDescribedBy":933,"dataFootnoteRef":260,"id":934},[280],"user-content-fnref-8-2",[207,936],{},[210,938,940],{"id":939},"六涌现能力从何而来","六、涌现能力从何而来",[215,942,944],{"id":943},"_61-什么是涌现能力","6.1 什么是涌现能力",[195,946,947,948,951,952],{},"Google Research 在 2022 年的论文《Emergent Abilities of Large Language Models》中定义：",[199,949,950],{},"涌现能力是指在小模型中不存在、但在大模型中出现的能力","。这类能力无法通过外推小模型的性能来预测。",[273,953,954],{},[276,955,959],{"href":956,"ariaDescribedBy":957,"dataFootnoteRef":260,"id":958},"#user-content-fn-9",[280],"user-content-fnref-9","9",[195,961,962],{},"典型的涌现能力包括：",[319,964,965,971,977,983],{},[231,966,967,970],{},[199,968,969],{},"上下文学习（In-Context Learning）","：从提示中的几个例子学习新任务",[231,972,973,976],{},[199,974,975],{},"链式思考推理（Chain-of-Thought Reasoning）","：通过生成中间步骤解决复杂问题",[231,978,979,982],{},[199,980,981],{},"指令遵循（Instruction Following）","：理解并执行自然语言指令",[231,984,985,988],{},[199,986,987],{},"多语言翻译","：在从未显式训练翻译任务的情况下进行跨语言转换",[215,990,992],{"id":991},"_62-涌现的三大驱动力","6.2 涌现的三大驱动力",[195,994,995,998,999],{},[199,996,997],{},"规模（Scale）","：模型参数量从百万级增长到千亿级，带来了质的变化。更大的模型能够学习更复杂的表征，捕捉更长距离的依赖关系。但涌现并非线性——它往往在特定的\"临界点\"突然发生。",[273,1000,1001],{},[276,1002,959],{"href":956,"ariaDescribedBy":1003,"dataFootnoteRef":260,"id":1004},[280],"user-content-fnref-9-2",[195,1006,1007,1010],{},[199,1008,1009],{},"预训练数据（Data）","：训练数据的数量、质量和多样性直接决定了模型能学到什么模式。现代 LLM 通常在数万亿 token 的文本上训练，涵盖书籍、网页、代码、论文等多种来源。数据的\"覆盖度\"决定了模型能处理的任务范围。",[195,1012,1013,1016,1017],{},[199,1014,1015],{},"架构（Architecture）","：Transformer 架构的自注意力机制（Self-Attention）允许模型在任意两个 token 之间建立直接联系，无论它们在文本中相距多远。这种全局上下文能力是涌现能力的技术基础。2017 年的论文《Attention Is All You Need》奠定了这一架构，它通过 Query-Key-Value 机制计算 token 间的相关性权重，实现了高效的并行处理。",[273,1018,1019],{},[276,1020,1024],{"href":1021,"ariaDescribedBy":1022,"dataFootnoteRef":260,"id":1023},"#user-content-fn-10",[280],"user-content-fnref-10","10",[215,1026,1028],{"id":1027},"_63-涌现的争议","6.3 涌现的争议",[195,1030,1031,1032],{},"并非所有研究者都认同\"涌现\"是一个真实的、不可预测的现象。2023 年，Schaeffer 等人在论文《Are Emergent Abilities of Large Language Models a Mirage?》中提出：所谓的\"涌现\"可能只是评估指标的非线性变化所致——当使用离散的 pass\u002Ffail 评估指标时，能力看起来是\"突变\"的；但如果改用连续的评估指标（如 token 编辑距离），能力的提升实际上是平滑的。",[273,1033,1034],{},[276,1035,959],{"href":956,"ariaDescribedBy":1036,"dataFootnoteRef":260,"id":1037},[280],"user-content-fnref-9-3",[195,1039,1040,1041,1044,1045,1048],{},"这一质疑的核心论点是：涌现并非模型本身的属性，而是",[199,1042,1043],{},"评估方式的人为产物","。如果换一种度量方式，许多\"涌现能力\"就会呈现渐进式增长。但即便如此，",[199,1046,1047],{},"大模型确实展现出了小模型不具备的复杂行为","，这一点是共识——争议的焦点在于这种能力的出现方式是否\"不可预测\"，而非能力本身是否真实。",[207,1050],{},[210,1052,1054],{"id":1053},"七对开发者的实际意义","七、对开发者的实际意义",[215,1056,1058],{"id":1057},"_71-正确设定预期","7.1 正确设定预期",[195,1060,1061],{},"理解 LLM 的概率本质后，开发者应该建立以下认知：",[195,1063,1064,1067],{},[199,1065,1066],{},"LLM 不是数据库","：它不会\"查找\"事实，而是\"生成\"看似合理的陈述。不要把它当作搜索引擎或知识库使用。",[195,1069,1070,1073],{},[199,1071,1072],{},"LLM 不是计算器","：对于精确计算，它可能给出错误答案。复杂数学应交给专门的计算工具。",[195,1075,1076,1079],{},[199,1077,1078],{},"LLM 不是法律\u002F医疗\u002F金融顾问","：在受监管领域，它的输出必须经过专业人士审核。",[195,1081,1082,1085],{},[199,1083,1084],{},"置信度不等于正确性","：模型输出得越自信，不代表它越正确。校准良好的模型仍然可能自信地犯错。",[215,1087,1089],{"id":1088},"_72-降低幻觉的工程实践","7.2 降低幻觉的工程实践",[195,1091,1092,1095],{},[199,1093,1094],{},"检索增强生成（RAG）","：让模型在生成回答前先检索相关文档，将生成约束在检索到的内容范围内。这是目前最有效的幻觉抑制手段之一。",[195,1097,1098,1101],{},[199,1099,1100],{},"工具使用（Tool Use）","：将计算、查询、验证等任务交给外部工具（如计算器、数据库、API），让 LLM 专注于理解和编排。",[195,1103,1104,1107],{},[199,1105,1106],{},"提示工程（Prompt Engineering）","：在提示中明确要求模型\"如果不确定，请说不知道\"、\"请基于提供的资料回答\"。虽然不能完全消除幻觉，但可以降低其发生率。",[195,1109,1110,1113],{},[199,1111,1112],{},"温度调低","：对于需要高准确性的任务，将 Temperature 设为 0 或接近 0，减少随机性带来的错误。",[215,1115,1117],{"id":1116},"_73-何时需要人工审查","7.3 何时需要人工审查",[195,1119,1120,1121],{},"根据 Comet 公司博客中的实践建议（注：Comet 为 ML 实验追踪平台提供商，该文章包含其产品推广内容），以下场景必须引入人工审查（Human-in-the-Loop）：",[273,1122,1123],{},[276,1124,1128],{"href":1125,"ariaDescribedBy":1126,"dataFootnoteRef":260,"id":1127},"#user-content-fn-11",[280],"user-content-fnref-11","11",[319,1130,1131,1137,1143,1149,1155],{},[231,1132,1133,1136],{},[199,1134,1135],{},"高风险决策","：医疗诊断、法律建议、金融投资、招聘评估等",[231,1138,1139,1142],{},[199,1140,1141],{},"事实敏感场景","：新闻报道、学术研究、历史陈述等",[231,1144,1145,1148],{},[199,1146,1147],{},"品牌和安全敏感场景","：对外发布的客服回复、社交媒体内容、营销文案等",[231,1150,1151,1154],{},[199,1152,1153],{},"边缘案例","：模型训练数据中未充分覆盖的罕见场景",[231,1156,1157,1160],{},[199,1158,1159],{},"合规要求","：受监管行业通常要求保留人工审核记录",[195,1162,1163,1164],{},"人工审查不是\" babysit the AI forever\"，而是\" teach the system how to evaluate itself\"。通过结构化的人类反馈（评分、注释、纠正），可以持续改进提示、评估指标和系统行为。",[273,1165,1166],{},[276,1167,1128],{"href":1125,"ariaDescribedBy":1168,"dataFootnoteRef":260,"id":1169},[280],"user-content-fnref-11-2",[215,1171,1173],{"id":1172},"_74-评估策略的反思","7.4 评估策略的反思",[195,1175,1176,1177,1180,1181],{},"OpenAI 的研究者呼吁：",[199,1178,1179],{},"主流评估基准需要从\"准确率优先\"转向\"惩罚过度自信\"","。具体建议包括：",[273,1182,1183],{},[276,1184,655],{"href":652,"ariaDescribedBy":1185,"dataFootnoteRef":260,"id":1186},[280],"user-content-fnref-5-7",[319,1188,1189,1192,1195],{},[231,1190,1191],{},"正确答案：得分 +1",[231,1193,1194],{},"\"我不知道\"\u002F弃权：得分 0",[231,1196,1197,1198,1201],{},"错误答案：得分负值（如 ",[258,1199,1200],{},"-t\u002F(1-t)","，其中 t 为置信度阈值）",[195,1203,1204],{},"只有当评估机制奖励谦逊、惩罚盲猜时，模型才会有动力学会\"知之为知之，不知为不知\"。",[207,1206],{},[210,1208,1210],{"id":1209},"八总结","八、总结",[195,1212,1213],{},"LLM 的\"胡说\"不是 bug，而是其概率本质的必然表现。理解这一点，有助于我们：",[228,1215,1216,1222,1228,1234],{},[231,1217,1218,1221],{},[199,1219,1220],{},"建立合理预期","——LLM 是强大的文本生成器，但不是全知全能的 oracle",[231,1223,1224,1227],{},[199,1225,1226],{},"选择正确的使用场景","——创意生成、头脑风暴、文本润色是它的强项；精确事实、关键决策需要额外验证",[231,1229,1230,1233],{},[199,1231,1232],{},"设计更可靠的系统","——通过 RAG、工具使用、人工审查等手段，将 LLM 的能力与人类的判断力结合",[231,1235,1236,1239],{},[199,1237,1238],{},"推动评估进步","——倡导更合理的评估标准，奖励诚实而非盲猜",[195,1241,1242,1243,1246],{},"正如 OpenAI 的研究者所言：",[199,1244,1245],{},"\"预训练产生的是预测性语言模型——擅长预测文本，甚至能把语法搞对。但后训练应该产生的是生成性模型——不幻觉，知道何时弃权。\"",[273,1247,1248],{},[276,1249,800],{"href":797,"ariaDescribedBy":1250,"dataFootnoteRef":260,"id":1251},[280],"user-content-fnref-6-2",[195,1253,1254],{},"在 AI 能力飞速提升的今天，理解它的局限性，比盲目崇拜它的能力更加重要。",[207,1256],{},[210,1258,1259],{"id":1259},"参考来源",[1261,1262,1265,1270],"section",{"className":1263,"dataFootnotes":260},[1264],"footnotes",[210,1266,1269],{"className":1267,"id":280},[1268],"sr-only","Footnotes",[228,1271,1272,1289,1309,1336,1370,1426,1442,1455,1475,1507,1516],{},[231,1273,1275,1276,1281,1282],{"id":1274},"user-content-fn-1","OpenAI, \"How LLMs work.\" OpenAI API Docs. ",[276,1277,1278],{"href":1278,"rel":1279},"https:\u002F\u002Fplatform.openai.com\u002Fdocs\u002Fguides\u002Fprompt-engineering",[1280],"nofollow"," ",[276,1283,1288],{"href":1284,"ariaLabel":1285,"className":1286,"dataFootnoteBackref":260},"#user-content-fnref-1","Back to reference 1",[1287],"data-footnote-backref","↩",[231,1290,1292,1293,1281,1297,1281,1302],{"id":1291},"user-content-fn-2","Anthropic, \"Claude's capabilities.\" Anthropic Docs. ",[276,1294,1295],{"href":1295,"rel":1296},"https:\u002F\u002Fcode.claude.com\u002Fdocs\u002Fen\u002Fabout-claude\u002Fmodels",[1280],[276,1298,1288],{"href":1299,"ariaLabel":1300,"className":1301,"dataFootnoteBackref":260},"#user-content-fnref-2","Back to reference 2",[1287],[276,1303,1288,1307],{"href":1304,"ariaLabel":1305,"className":1306,"dataFootnoteBackref":260},"#user-content-fnref-2-2","Back to reference 2-2",[1287],[273,1308,300],{},[231,1310,1312,1313,1281,1317,1281,1322,1281,1329],{"id":1311},"user-content-fn-3","DigitalOcean, \"LLM Tokenizers Simplified: BPE, SentencePiece, and More\". ",[276,1314,1315],{"href":1315,"rel":1316},"https:\u002F\u002Fwww.digitalocean.com\u002Fcommunity\u002Fconceptual-articles\u002Fllm-tokenizers-bpe-sentencepiece-custom-vs-pretrained",[1280],[276,1318,1288],{"href":1319,"ariaLabel":1320,"className":1321,"dataFootnoteBackref":260},"#user-content-fnref-3","Back to reference 3",[1287],[276,1323,1288,1327],{"href":1324,"ariaLabel":1325,"className":1326,"dataFootnoteBackref":260},"#user-content-fnref-3-2","Back to reference 3-2",[1287],[273,1328,300],{},[276,1330,1288,1334],{"href":1331,"ariaLabel":1332,"className":1333,"dataFootnoteBackref":260},"#user-content-fnref-3-3","Back to reference 3-3",[1287],[273,1335,350],{},[231,1337,1339,1340,1281,1344,1281,1349,1281,1356,1281,1363],{"id":1338},"user-content-fn-4","OpenAI Community, \"Cheat Sheet: Mastering Temperature and Top_p in ChatGPT API.\" ",[276,1341,1342],{"href":1342,"rel":1343},"https:\u002F\u002Fcommunity.openai.com\u002Ft\u002Fcheat-sheet-mastering-temperature-and-top-p-in-chatgpt-api\u002F172683",[1280],[276,1345,1288],{"href":1346,"ariaLabel":1347,"className":1348,"dataFootnoteBackref":260},"#user-content-fnref-4","Back to reference 4",[1287],[276,1350,1288,1354],{"href":1351,"ariaLabel":1352,"className":1353,"dataFootnoteBackref":260},"#user-content-fnref-4-2","Back to reference 4-2",[1287],[273,1355,300],{},[276,1357,1288,1361],{"href":1358,"ariaLabel":1359,"className":1360,"dataFootnoteBackref":260},"#user-content-fnref-4-3","Back to reference 4-3",[1287],[273,1362,350],{},[276,1364,1288,1368],{"href":1365,"ariaLabel":1366,"className":1367,"dataFootnoteBackref":260},"#user-content-fnref-4-4","Back to reference 4-4",[1287],[273,1369,484],{},[231,1371,1373,1374,1378,1379,1281,1384,1281,1391,1281,1398,1281,1405,1281,1412,1281,1419],{"id":1372},"user-content-fn-5","OpenAI, \"Why Language Models Hallucinate\", 2025. ",[276,1375,1376],{"href":1376,"rel":1377},"https:\u002F\u002Fopenai.com\u002Findex\u002Fwhy-language-models-hallucinate\u002F",[1280]," (论文 arXiv:2509.04664) ",[276,1380,1288],{"href":1381,"ariaLabel":1382,"className":1383,"dataFootnoteBackref":260},"#user-content-fnref-5","Back to reference 5",[1287],[276,1385,1288,1389],{"href":1386,"ariaLabel":1387,"className":1388,"dataFootnoteBackref":260},"#user-content-fnref-5-2","Back to reference 5-2",[1287],[273,1390,300],{},[276,1392,1288,1396],{"href":1393,"ariaLabel":1394,"className":1395,"dataFootnoteBackref":260},"#user-content-fnref-5-3","Back to reference 5-3",[1287],[273,1397,350],{},[276,1399,1288,1403],{"href":1400,"ariaLabel":1401,"className":1402,"dataFootnoteBackref":260},"#user-content-fnref-5-4","Back to reference 5-4",[1287],[273,1404,484],{},[276,1406,1288,1410],{"href":1407,"ariaLabel":1408,"className":1409,"dataFootnoteBackref":260},"#user-content-fnref-5-5","Back to reference 5-5",[1287],[273,1411,655],{},[276,1413,1288,1417],{"href":1414,"ariaLabel":1415,"className":1416,"dataFootnoteBackref":260},"#user-content-fnref-5-6","Back to reference 5-6",[1287],[273,1418,800],{},[276,1420,1288,1424],{"href":1421,"ariaLabel":1422,"className":1423,"dataFootnoteBackref":260},"#user-content-fnref-5-7","Back to reference 5-7",[1287],[273,1425,848],{},[231,1427,1429,1430,1281,1435],{"id":1428},"user-content-fn-6","OpenAI Research, \"Calibration and Truthfulness in Language Models.\" 基于 OpenAI \"Why Language Models Hallucinate\" 研究。 ",[276,1431,1288],{"href":1432,"ariaLabel":1433,"className":1434,"dataFootnoteBackref":260},"#user-content-fnref-6","Back to reference 6",[1287],[276,1436,1288,1440],{"href":1437,"ariaLabel":1438,"className":1439,"dataFootnoteBackref":260},"#user-content-fnref-6-2","Back to reference 6-2",[1287],[273,1441,300],{},[231,1443,1445,1446,1281,1450],{"id":1444},"user-content-fn-7","Bender, E. M., et al. \"On the Dangers of Stochastic Parrots: Can Language Models Be Too Big?\" FAccT 2021. ",[276,1447,1448],{"href":1448,"rel":1449},"https:\u002F\u002Fs10251.pcdn.co\u002Fpdf\u002F2021-bender-parrots.pdf",[1280],[276,1451,1288],{"href":1452,"ariaLabel":1453,"className":1454,"dataFootnoteBackref":260},"#user-content-fnref-7","Back to reference 7",[1287],[231,1456,1458,1459,1281,1463,1281,1468],{"id":1457},"user-content-fn-8","Anthropic, \"On the Biology of a Large Language Model\", Transformer Circuits, 2025. ",[276,1460,1461],{"href":1461,"rel":1462},"https:\u002F\u002Ftransformer-circuits.pub\u002F2025\u002Fattribution-graphs\u002Fbiology.html",[1280],[276,1464,1288],{"href":1465,"ariaLabel":1466,"className":1467,"dataFootnoteBackref":260},"#user-content-fnref-8","Back to reference 8",[1287],[276,1469,1288,1473],{"href":1470,"ariaLabel":1471,"className":1472,"dataFootnoteBackref":260},"#user-content-fnref-8-2","Back to reference 8-2",[1287],[273,1474,300],{},[231,1476,1478,1479,1483,1484,1281,1488,1281,1493,1281,1500],{"id":1477},"user-content-fn-9","Wei, J., et al. \"Emergent Abilities of Large Language Models.\" arXiv:2206.07682, Google Research, 2022. ",[276,1480,1481],{"href":1481,"rel":1482},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2206.07682",[1280],"; Schaeffer, R., et al. \"Are Emergent Abilities of Large Language Models a Mirage?\" arXiv:2304.15004, 2023. ",[276,1485,1486],{"href":1486,"rel":1487},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2304.15004",[1280],[276,1489,1288],{"href":1490,"ariaLabel":1491,"className":1492,"dataFootnoteBackref":260},"#user-content-fnref-9","Back to reference 9",[1287],[276,1494,1288,1498],{"href":1495,"ariaLabel":1496,"className":1497,"dataFootnoteBackref":260},"#user-content-fnref-9-2","Back to reference 9-2",[1287],[273,1499,300],{},[276,1501,1288,1505],{"href":1502,"ariaLabel":1503,"className":1504,"dataFootnoteBackref":260},"#user-content-fnref-9-3","Back to reference 9-3",[1287],[273,1506,350],{},[231,1508,1510,1511],{"id":1509},"user-content-fn-10","Vaswani, A., et al. \"Attention Is All You Need.\" NeurIPS 2017. (Transformer 架构原始论文) ",[276,1512,1288],{"href":1513,"ariaLabel":1514,"className":1515,"dataFootnoteBackref":260},"#user-content-fnref-10","Back to reference 10",[1287],[231,1517,1519,1520,1524,1525,1281,1530],{"id":1518},"user-content-fn-11","Comet, \"Human-in-the-Loop Review Workflows for LLM Applications & Agents\", 2025. ",[276,1521,1522],{"href":1522,"rel":1523},"https:\u002F\u002Fwww.comet.com\u002Fsite\u002Fblog\u002Fhuman-in-the-loop\u002F",[1280]," (注：Comet 为 ML 实验追踪平台提供商，该文章包含其产品推广内容) ",[276,1526,1288],{"href":1527,"ariaLabel":1528,"className":1529,"dataFootnoteBackref":260},"#user-content-fnref-11","Back to reference 11",[1287],[276,1531,1288,1535],{"href":1532,"ariaLabel":1533,"className":1534,"dataFootnoteBackref":260},"#user-content-fnref-11-2","Back to reference 11-2",[1287],[273,1536,300],{},{"title":260,"searchDepth":1538,"depth":1539,"links":1540},1,2,[1541,1547,1553,1560,1567,1572,1577,1583,1584],{"id":212,"depth":1539,"text":213,"children":1542},[1543,1545,1546],{"id":217,"depth":1544,"text":218},3,{"id":245,"depth":1544,"text":246},{"id":285,"depth":1544,"text":286},{"id":305,"depth":1539,"text":306,"children":1548},[1549,1550,1551,1552],{"id":309,"depth":1544,"text":310},{"id":338,"depth":1544,"text":339},{"id":369,"depth":1544,"text":370},{"id":457,"depth":1544,"text":458},{"id":472,"depth":1539,"text":473,"children":1554},[1555,1556,1557,1558,1559],{"id":487,"depth":1544,"text":488},{"id":494,"depth":1544,"text":495},{"id":534,"depth":1544,"text":535},{"id":541,"depth":1544,"text":542},{"id":557,"depth":1544,"text":558},{"id":633,"depth":1539,"text":634,"children":1561},[1562,1563,1564,1565,1566],{"id":637,"depth":1544,"text":638},{"id":658,"depth":1544,"text":659},{"id":703,"depth":1544,"text":704},{"id":781,"depth":1544,"text":782},{"id":806,"depth":1544,"text":807},{"id":829,"depth":1539,"text":830,"children":1568},[1569,1570,1571],{"id":833,"depth":1544,"text":834},{"id":872,"depth":1544,"text":873},{"id":909,"depth":1544,"text":910},{"id":939,"depth":1539,"text":940,"children":1573},[1574,1575,1576],{"id":943,"depth":1544,"text":944},{"id":991,"depth":1544,"text":992},{"id":1027,"depth":1544,"text":1028},{"id":1053,"depth":1539,"text":1054,"children":1578},[1579,1580,1581,1582],{"id":1057,"depth":1544,"text":1058},{"id":1088,"depth":1544,"text":1089},{"id":1116,"depth":1544,"text":1117},{"id":1172,"depth":1544,"text":1173},{"id":1209,"depth":1539,"text":1210},{"id":1259,"depth":1539,"text":1259},"为什么 AI 会\"胡说\"","md",null,{"date":1589},"2026-04-26",true,{"title":23,"description":1585},"AXkqFe3KD3lcHZv6sLX3LoIhZfieRsO0W9oyIQx9Ha0",[1594,1595],{"title":19,"path":20,"stem":21,"description":260,"children":-1},{"title":27,"path":28,"stem":29,"description":1596,"children":-1},"Planning + Memory + Tool Use",1777395307934]