[{"data":1,"prerenderedAt":376},["ShallowReactive",2],{"$fgukOamtKU1RtUiMFsqdObttmqPPQz0uc7bl_gj_LyX0":3,"$foAB1YA1-bbXkA779N6-rkR7HmLwt_jzIT3hO-7OCRDg":245,"article-31":375},{"code":4,"msg":5,"data":6},0,"",{"category":7,"tag":11,"popular":19,"latest":86,"banner":126,"list":151,"cache":244},[8,9,10],"Agent","OpenAI","LLM",[8,12,13,14,9,10,15,16,17,18],"Google","Nvidia","Claude","DeepSeek","OCR","Chat","Generator",[20,29,37,45,54,62,70,79],{"id":21,"publish_date":22,"is_original":23,"collection":5,"cover_url":24,"cover_url_1_1":25,"title":26,"summary":27,"author":28},411,"2023-09-10",1,"article_res/cover/451ef50c225a8dc61c4336506794d13b.jpeg","article_res/cover/3ba9dc7a72f87d40b20fc2d225289ee3.jpeg","Idealism","Reality is created by the mind, we can change our reality by changing our mind. - Plato","Renee's Entrepreneurial Journey",{"id":30,"publish_date":31,"is_original":23,"collection":32,"cover_url":33,"cover_url_1_1":34,"title":35,"summary":36,"author":28},108,"2024-12-07","#LLM #AGI #AI Agent","article_res/cover/0039044422e4ec9f61c18e8ee1693bb0.jpeg","article_res/cover/4220971b108a91d21407d87bb02fbaa6.jpeg","Freysa.ai: The World's First Adversarial AI Agent Game","说服 Freysa 把钱包里的钱都拿出来",{"id":38,"publish_date":39,"is_original":23,"collection":40,"cover_url":41,"cover_url_1_1":42,"title":43,"summary":44,"author":28},12,"2025-03-09","#Oxford #Reasoning #LLM #Tool Use","article_res/cover/d448e9b3617a0b5302e1bd10c438bca9.jpeg","article_res/cover/864a468f9cc4c9317efadb3811909888.jpeg","Agentic Reasoning Framework - Significantly enhance the reasoning ability of LLMs through the integration of external tools using agents","Agentic Reasoning: Reasoning LLMs with Tools for Deep Research",{"id":46,"publish_date":47,"is_original":4,"collection":48,"cover_url":49,"cover_url_1_1":50,"title":51,"summary":52,"author":53},480,"2023-04-14","#Stable Diffusion","article_res/cover/0bdbe7cb1de4a78e54536e5d9afa7ec9.jpeg","article_res/cover/b3d6ffec0608dcfaf18c5a69906d1490.jpeg","【AIGC Learning】Generate Prompts Using Word Graphs - Stable Diffusion Web UI Series 13","AI will become a powerful tool in education, transforming the way we learn and deliver instruction.  \n- Reid Hoffman","--",{"id":55,"publish_date":56,"is_original":4,"collection":57,"cover_url":58,"cover_url_1_1":59,"title":60,"summary":61,"author":28},413,"2023-09-08","#Neuroscience","article_res/cover/74f8302d78a23d9430f22171eae136b6.jpeg","article_res/cover/87ca08af81bb304746be5261160964c0.jpeg","Can machines be conscious?","Do we have an ethical obligation to not turn off conscious machines? Would turning them off be murder? No. I don't lose any sleep over unplugging a conscious machine.\n- Jeff Hawkins, \"A Thousand Brains\"",{"id":63,"publish_date":64,"is_original":23,"collection":65,"cover_url":66,"cover_url_1_1":67,"title":68,"summary":69,"author":28},178,"2024-09-09","#Entrepreneurship","article_res/cover/a7224f025b55d1820408085faef63079.jpeg","article_res/cover/11a9995b096cbf64465ef01b8673b154.jpeg","37signals company","This damn sense of relaxation",{"id":71,"publish_date":72,"is_original":4,"collection":73,"cover_url":74,"cover_url_1_1":75,"title":76,"summary":77,"author":78},460,"2023-05-12","#Google","article_res/cover/b970687b12faa52da976f91248c2aa7b.jpeg","article_res/cover/d1e71b52cfd2c63bc6e71f3e85ff135c.jpeg","Learn what BRC-20 and Ordinals are using Google Bard","Ordinals - a new protocol that allows users to store arbitrary data on the Bitcoin blockchain","Google Bard mainly writes",{"id":80,"publish_date":81,"is_original":23,"collection":5,"cover_url":82,"cover_url_1_1":83,"title":84,"summary":85,"author":28},309,"2024-03-26","article_res/cover/9877f95894ee88532d0e6012c23a2df3.jpeg","article_res/cover/20092164ddc109ce6ae56b1984246751.jpeg","Learning the Cancun Upgrade with lepton and perplexity","Building a quick conversation-based search demo with Lepton AI.",[87,95,103,111,119],{"id":88,"publish_date":89,"is_original":23,"collection":90,"cover_url":91,"cover_url_1_1":92,"title":93,"summary":94,"author":28},627,"2025-03-20","#AI Avatar #AI Video Generation","article_res/cover/d95481358f73924989f8c4ee9c75d1c8.jpeg","article_res/cover/b74bc0fab01f8b6a6aa87696c0c3ed8b.jpeg","DisPose: Generating Animated Videos by Driving Video with Reference Images","DisPose is a controllable human image animation method that enhances video generation.",{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},626,"2025-03-21","#Deep Dive into LLMs #LLM #RL #Andrej Karpathy #AlphaGo","article_res/cover/446553a5c8f8f2f07d97b20eaee84e56.jpeg","article_res/cover/e6c2823409c9b34624064b9acbaca6f1.jpeg","AlphaGo and the Power of Reinforcement Learning - Andrej Karpathy's Deep Dive on LLMs (Part 9)","Simply learning from humans will never surpass human capabilities.",{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},625,"2025-03-22","#Deep Dive into LLMs #LLM #RL #RLHF #Andrej Karpathy","article_res/cover/8da81d38b1e5cf558a164710fd8a5389.jpeg","article_res/cover/96f028d76c362a99a0dd56389e8f7a9b.jpeg","Reinforcement Learning from Human Feedback (RLHF) - Andrej Karpathy's Deep Dive on LLMs (Part 10)","Fine-Tuning Language Models from Human Preferences",{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},624,"2025-03-23","#Deep Dive into LLMs #LLM #Andrej Karpathy #AI Agent #MMM","article_res/cover/a5e7c3d48bb09109684d6513287c661d.jpeg","article_res/cover/d3f22b7c0ab8d82fd2da457a299e0773.jpeg","The Future of Large Language Models - Andrej Karpathy's In-Depth Explanation of LLM (Part 11)","preview of things to come",{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},623,"#Google #Voe #AI Video Generation","article_res/cover/c44062fea0f336c2b96b3928292392c2.jpeg","article_res/cover/a041041c69092ad3db191c5bf3ff981b.jpeg","Trial of Google's video generation model VOE2","Our state-of-the-art video generation model",[127,135,143],{"id":128,"publish_date":129,"is_original":23,"collection":130,"cover_url":131,"cover_url_1_1":132,"title":133,"summary":134,"author":28},300,"2024-04-16","#AI in Science #AGI","article_res/cover/6bf01e793e0f33e848572412eebdf9b0.jpeg","article_res/cover/91a5ee21dafecb914fabeb9430d46ec1.jpeg","Would Einstein lose his job - AI and Quantum Computing: A Glimpse into the Near Future","So Einstein's job is still safe.",{"id":136,"publish_date":137,"is_original":23,"collection":138,"cover_url":139,"cover_url_1_1":140,"title":141,"summary":142,"author":28},101,"2024-12-14","#Nvidia #AI 3D Generator","article_res/cover/693e07c85980c5c0c8fde3f037733f23.jpeg","article_res/cover/9ea8edff2d5d303ff3fffff3f6f9c3d9.jpeg","NVIDIA's open-source 3D project LLaMA-Mesh","LLaMA-Mesh: Unifying 3D Mesh Generation with Language Models",{"id":144,"publish_date":145,"is_original":23,"collection":146,"cover_url":147,"cover_url_1_1":148,"title":149,"summary":150,"author":28},131,"2024-11-10","#OpenAI","article_res/cover/87f8ed353ce39f31960e7cdfaf075a35.jpeg","article_res/cover/f597a63935f5cd32e484b4aadd6019e8.jpeg","ChatGPT has launched the Search function","Get fast, timely answers with links to relevant web sources.",{"big":152,"small":214},[153,181],{"title":154,"list":155},"AGENT",[156,157,165,173],{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":158,"publish_date":159,"is_original":23,"collection":160,"cover_url":161,"cover_url_1_1":162,"title":163,"summary":164,"author":28},622,"2025-03-24","#OWL #AI Agent #MAS #MCP #CUA","article_res/cover/cb50ca7f2bf4d1ed50202d7406e1c19a.jpeg","article_res/cover/4aa7aa3badfacf3cc84121334f1050dd.jpeg","OWL: Multi-agent collaboration","OWL: Optimized Workforce Learning for General Multi-Agent Assistance in Real-World Task Automation",{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},620,"2025-03-26","#LLM #Google #Gemini #AI Agent","article_res/cover/53751a6dbbe990b1eb0b63f3b062aed4.jpeg","article_res/cover/031344981f0a212ff82d1f3a64aa5756.jpeg","Gemini 2.5 Pro, claimed to be far ahead of the competition, has been released with great fanfare: comprehensively surpassing other LLMs and topping the global rankings","Gemini 2.5: Our most intelligent AI model",{"id":174,"publish_date":175,"is_original":23,"collection":176,"cover_url":177,"cover_url_1_1":178,"title":179,"summary":180,"author":28},616,"2025-03-29","#MAS #AI Agent #AI Coder #MetaGPT #MGX","article_res/cover/9dcd702ad2035902e5e77967c34a1f1e.jpeg","article_res/cover/0a97fc4a922753c8f46ff38792020df8.jpeg","MGX - An automated website-building platform composed of multiple AI Agents","Your 24/7 AI Team | Dream, Chat, Create.",{"title":182,"list":183},"OPENAI",[184,191,199,206],{"id":185,"publish_date":167,"is_original":23,"collection":186,"cover_url":187,"cover_url_1_1":188,"title":189,"summary":190,"author":28},619,"#OpenAI #AI Image Generator #4o #MMM #AR Transformer","article_res/cover/2faffc97fcecf3151552cb0fd3206d89.jpeg","article_res/cover/1133cb4948af44cee2e7fbe79efb69e5.jpeg","The native image function of GPT-4o is officially launched","Introducing 4o Image Generation",{"id":192,"publish_date":193,"is_original":4,"collection":194,"cover_url":195,"cover_url_1_1":196,"title":197,"summary":198,"author":28},434,"2023-07-15","#Anthropic #OpenAI #Google #AI Code Generator #Claude","article_res/cover/e1b6f600a2b9f262a4392684e5f2ce25.jpeg","article_res/cover/6e1772e83f78f9a351ab23d3e414adee.jpeg","Latest Updates on Google Bard /Anthropic Claude2 / ChatGPT Code Interpreter","We want our models to use their programming skills to provide more natural interfaces to the basic functions of our computers.  \n - OpenAI",{"id":200,"publish_date":201,"is_original":4,"collection":146,"cover_url":202,"cover_url_1_1":203,"title":204,"summary":205,"author":28},417,"2023-08-24","article_res/cover/bccf897d50a88b18364e35f7466387e0.jpeg","article_res/cover/2f871085c1073717c1703ae86e18056f.jpeg","The GPT-3.5 Turbo fine-tuning (fine-tuning function) has been released～","Developers can now bring their own data to customize GPT-3.5 Turbo for their use cases.",{"id":207,"publish_date":208,"is_original":4,"collection":209,"cover_url":210,"cover_url_1_1":211,"title":212,"summary":213,"author":28},407,"2023-09-22","#OpenAI #AI Image Generator","article_res/cover/c59005e903d35cfc32346e2756e2728a.jpeg","article_res/cover/ba011d265e6d84b5c8cb6fd6b757b6cc.jpeg","Dall-E 3","DALL·E 3 understands significantly more nuance and detail, allowing you to easily translate your ideas into images.",[215,221,241],{"title":10,"list":216},[217,218,219,220],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":223},"GOOGLE",[224,225,226,234],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},615,"2025-03-30","#AI Researcher #AI Science #HKU #Google #AI Agent","article_res/cover/21fadf906067714bb0db31ae13a77c15.jpeg","article_res/cover/2697999a72bd26b22e85f0e92936d3ed.jpeg","AI-Researcher: LLM-driven全自动 scientific research assistant","AI-Researcher: Fully-Automated Scientific Discovery with LLM Agents  \nOpen-Sourced Alternative to Google AI Co-Scientist",{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},463,"2023-05-09","article_res/cover/89800f207723acdb55fc53bf999ebdc9.jpeg","article_res/cover/5764f369b4accd8f83e94aa4c077a175.jpeg","The Smallville sandbox world - A town with 25 virtual residents","Believable proxies of human behavior can empower interactive apps: Immersive environment, Rehearsal space, Prototyping tool",{"title":242,"list":243},"NVIDIA",[],true,{"code":4,"msg":5,"data":246},{"id":247,"publish_date":248,"is_original":23,"collection":249,"articles_id":250,"cover_url":251,"cover_url_1_1":252,"title":253,"summary":254,"author":28,"content":255,"popular":256,"list":316,"category":373,"tag":374},31,"2025-02-22","#Deep Dive into LLMs #Andrej Karpathy #Tool Use #LLM","yK5yNNM8DTh_cOYOEgIm9Q","article_res/cover/5467e5c1727674928e485404ba7c640d.jpeg","article_res/cover/d568c185751166507743b4f1353ec06a.jpeg","The spelling challenges and uneven intelligence of LLMs? Andrej Karpathy's in-depth explanation of LLMs (Part 7)","模型在拼写方面存在困难","\u003Cdiv class=\"rich_media_content js_underline_content\n                       autoTypeSetting24psection\n            \" id=\"js_content\">\u003Csection mpa-from-tpl=\"t\" data-mpa-action-id=\"m7eh8k1j1k9v\">\u003Cspan leaf=\"\">\u003Cbr>\u003C/span>\u003C/section>\u003Csection data-mpa-template=\"t\" mpa-from-tpl=\"t\" data-mpa-action-id=\"m7eh8k1j1vm1\">\u003Csection style=\"text-align: center;transform: translate3d(2px, 0px, 0px);margin: 10px 0%;padding:0 10px;\" mpa-from-tpl=\"t\">\u003Csection style=\"display: inline-block;min-width: 10%;vertical-align: top;background-color: rgb(255, 202, 0);\" mpa-from-tpl=\"t\">\u003Csection style=\"margin: 5px 0% -5px;transform: translate3d(-5px, 0px, 0px);\" mpa-from-tpl=\"t\">\u003Csection style=\"display: inline-block;min-width: 10%;vertical-align: top;border-style: solid;border-width: 8px 1px 1px;border-radius: 0px;border-color: rgb(62, 62, 62);background-color: rgb(255, 255, 255);padding-right: 8px;padding-left: 8px;\" mpa-from-tpl=\"t\">\u003Csection mpa-from-tpl=\"t\">\u003Csection style=\"text-align: left;padding-right: 8px;padding-left: 8px;\" mpa-from-tpl=\"t\">\u003Cp>\u003Cspan leaf=\"\">Spelling Challenges\u003C/span>\u003C/p>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">When exploring the capabilities of Large Language Models (LLMs), we often marvel at their impressive performances in mathematics, logical reasoning, and even writing. However, in\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">spelling-related tasks\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">LLMs tend to perform less ideally. This is because\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">the world of models is built on \"tokens\" rather than individual characters.\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">This will affect its ability to handle spelling tasks.\u003C/span>\u003C/p>\u003Ch2 style='box-sizing: border-box;margin: 30px 0px 15px;color: rgba(0, 0, 0, 0.85);font-weight: 500;cursor: pointer;padding: 0px;display: block;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan style=\"box-sizing: border-box;cursor: pointer;font-size: 22px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;text-align: left;font-weight: bold;display: block;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">1. Why are LLMs not good at spelling?\u003C/span>\u003C/strong>\u003C/span>\u003C/h2>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">LLM \u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">They do not see characters directly like humans do.\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">Their input is\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">tokens\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">，that is, the text is broken down into\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">Larger text block\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">. The model learns token-level language structures during training rather than learning word spellings character by character.\u003C/span>\u003C/p>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">For example, the word\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">“ubiquitous”\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">may be split into\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">multiple tokens in the eyes of an LLM.\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">instead of character by character:\u003C/span>\u003C/p>\u003Csection style=\"text-align: center;\" nodeleaf=\"\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-ratio=\"0.4462962962962963\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" type=\"block\" data-imgfileid=\"100010222\" style=\"height: auto !important;\" src=\"./assets/17423770652890.5581772514049488.png\">\u003C/section>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">Therefore, when we ask the model\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">to extract specific characters (such as selecting one every three characters)\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">it will perform poorly because it does not directly \"see\" individual letters of words but sees\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">concatenated tokens\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">。\u003C/span>\u003C/p>\u003Ch2 style='box-sizing: border-box;margin: 30px 0px 15px;color: rgba(0, 0, 0, 0.85);font-weight: 500;cursor: pointer;padding: 0px;display: block;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan style=\"box-sizing: border-box;cursor: pointer;font-size: 22px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;text-align: left;font-weight: bold;display: block;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">2. Specific case: Failure in spelling tasks\u003C/span>\u003C/strong>\u003C/span>\u003C/h2>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">Suppose we give the LLM a task like this:\u003C/span>\u003C/p>\u003Cblockquote style='box-sizing: border-box;margin: 20px 0px;cursor: pointer;padding: 10px 10px 10px 20px;border-style: none none none solid;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0.05);width: auto;height: auto;box-shadow: rgba(0, 0, 0, 0) 0px 0px 0px 0px;display: block;overflow: auto;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cp style=\"box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: 0em;text-align: left;text-indent: 0em;padding: 8px 0px;font-weight: normal;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">Please print every third character of the word \"ubiquitous\" starting from the first letter.\u003C/span>\u003C/strong>\u003C/p>\u003C/blockquote>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">We expect to get:\u003C/span>\u003C/p>\u003Cpre style='box-sizing: border-box;font-size: 16px;font-family: SFMono-Regular, Consolas, \"Liberation Mono\", Menlo, Courier, monospace;margin: 10px 0px;overflow: auto;cursor: pointer;border-radius: 5px;box-shadow: rgba(0, 0, 0, 0.55) 0px 2px 10px;text-align: left;padding: 0px;color: rgb(0, 0, 0);font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;letter-spacing: normal;orphans: 2;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Ccode style=\"box-sizing: border-box;font-family: Consolas, Monaco, Menlo, monospace;font-size: 12px;display: -webkit-box;overflow-x: auto;padding: 15px 16px 16px;color: rgb(171, 178, 191);background: rgb(40, 44, 52);cursor: pointer;border-radius: 5px;\">\u003Cspan leaf=\"\">U Q T S\u003C/span>\u003Cspan leaf=\"\">\u003Cbr>\u003C/span>\u003C/code>\u003C/pre>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">However, the LLM's answer is usually wrong because it doesn't process tasks at the character level but rather operates based on tokens.\u003C/span>\u003C/p>\u003Csection style=\"text-align: center;\" nodeleaf=\"\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-ratio=\"0.3905325443786982\" data-s=\"300,640\" data-type=\"png\" data-w=\"1014\" type=\"block\" data-imgfileid=\"100010221\" style=\"height: auto !important;\" src=\"./assets/17423770653130.6270527670269754.png\">\u003C/section>\u003Ch3 style='box-sizing: border-box;margin: 30px 0px 15px;color: rgba(0, 0, 0, 0.85);font-weight: 500;cursor: pointer;padding: 0px;display: block;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan style=\"box-sizing: border-box;cursor: pointer;font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;text-align: left;font-weight: bold;display: block;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">Why does it fail?\u003C/span>\u003C/strong>\u003C/span>\u003C/h3>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">✅ \u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">Humans see characters\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">and can easily extract the specified letters.\u003C/span>\u003Cspan leaf=\"\">\u003Cbr>\u003C/span>\u003Cspan leaf=\"\">❌ \u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">LLMs see tokens\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">rather than individual characters, so they cannot accurately perform character-level operations.\u003C/span>\u003C/p>\u003Ch2 style='box-sizing: border-box;margin: 30px 0px 15px;color: rgba(0, 0, 0, 0.85);font-weight: 500;cursor: pointer;padding: 0px;display: block;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan style=\"box-sizing: border-box;cursor: pointer;font-size: 22px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;text-align: left;font-weight: bold;display: block;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">3. Classic error case: Can LLMs not count \"R\"?\u003C/span>\u003C/strong>\u003C/span>\u003C/h2>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">A well-known LLM spelling issue is:\u003C/span>\u003C/p>\u003Cblockquote style='box-sizing: border-box;margin: 20px 0px;cursor: pointer;padding: 10px 10px 10px 20px;border-style: none none none solid;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0.05);width: auto;height: auto;box-shadow: rgba(0, 0, 0, 0) 0px 0px 0px 0px;display: block;overflow: auto;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cp style=\"box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: 0em;text-align: left;text-indent: 0em;padding: 8px 0px;font-weight: normal;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">How many R's are in \"strawberry\"?\u003C/span>\u003C/strong>\u003C/p>\u003C/blockquote>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">✅ The correct answer is\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">3\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">。\u003C/span>\u003Cspan leaf=\"\">\u003Cbr>\u003C/span>\u003Cspan leaf=\"\">❌ Many early LLMs (including GPT-3) would incorrectly answer\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">2\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">。\u003C/span>\u003C/p>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">This has triggered extensive discussions,\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">Why can an AI that solves Olympiad math problems not correctly count letters?\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">There are two main reasons for this:\u003C/span>\u003C/p>\u003Col style='box-sizing: border-box;margin: 8px 0px;cursor: pointer;list-style-type: decimal;padding: 0px 0px 0px 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;' class=\"list-paddingleft-1\">\u003Cli style=\"box-sizing: border-box;cursor: pointer;\">\u003Csection style=\"box-sizing: border-box;cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);font-size: 16px;line-height: 1.8em;letter-spacing: 0em;text-align: left;font-weight: normal;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">LLMs see tokens, not characters.\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">Thus, they cannot process spelling tasks letter by letter.\u003C/span>\u003C/section>\u003C/li>\u003Cli style=\"box-sizing: border-box;cursor: pointer;\">\u003Csection style=\"box-sizing: border-box;cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);font-size: 16px;line-height: 1.8em;letter-spacing: 0em;text-align: left;font-weight: normal;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">LLMs are inherently not good at counting\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">which makes it even more difficult to count characters.\u003C/span>\u003C/section>\u003C/li>\u003C/ol>\u003Ch2 style='box-sizing: border-box;margin: 30px 0px 15px;color: rgba(0, 0, 0, 0.85);font-weight: 500;cursor: pointer;padding: 0px;display: block;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan style=\"box-sizing: border-box;cursor: pointer;font-size: 22px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;text-align: left;font-weight: bold;display: block;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">4. How to make LLMs perform spelling tasks correctly?\u003C/span>\u003C/strong>\u003C/span>\u003C/h2>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">Since LLMs\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">cannot reliably handle character-level tasks\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">, we can\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">leverage the code interpreter (Code Interpreter)\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">to solve this problem. For example, we could have the model invoke Python:\u003C/span>\u003C/p>\u003Ch3 style='box-sizing: border-box;margin: 30px 0px 15px;color: rgba(0, 0, 0, 0.85);font-weight: 500;cursor: pointer;padding: 0px;display: block;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan style=\"box-sizing: border-box;cursor: pointer;font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;text-align: left;font-weight: bold;display: block;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">Example: Using Python to solve problems\u003C/span>\u003C/strong>\u003C/span>\u003C/h3>\u003Csection style=\"text-align: center;\" nodeleaf=\"\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100010223\" data-ratio=\"0.9027777777777778\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" type=\"block\" style=\"height: auto !important;\" src=\"./assets/17423770653010.4858366828345302.png\">\u003C/section>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">We can ask the LLM:\u003C/span>\u003C/p>\u003Cpre style='box-sizing: border-box;font-size: 16px;font-family: SFMono-Regular, Consolas, \"Liberation Mono\", Menlo, Courier, monospace;margin: 10px 0px;overflow: auto;cursor: pointer;border-radius: 5px;box-shadow: rgba(0, 0, 0, 0.55) 0px 2px 10px;text-align: left;padding: 0px;color: rgb(0, 0, 0);font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;letter-spacing: normal;orphans: 2;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Ccode style=\"box-sizing: border-box;font-family: Consolas, Monaco, Menlo, monospace;font-size: 12px;display: -webkit-box;overflow-x: auto;padding: 15px 16px 16px;color: rgb(171, 178, 191);background: rgb(40, 44, 52);cursor: pointer;border-radius: 5px;\">\u003Cspan leaf=\"\">Please use Python code to extract\u003C/span>\u003Cspan style=\"box-sizing: border-box;color: rgb(152, 195, 121);cursor: pointer;line-height: 26px;\">\u003Cspan leaf=\"\">\"ubiquitous\"\u003C/span>\u003C/span>\u003Cspan leaf=\"\">every third character from the string.\u003C/span>\u003Cspan leaf=\"\">\u003Cbr>\u003C/span>\u003C/code>\u003C/pre>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">Python code generated by LLM:\u003C/span>\u003C/strong>\u003C/p>\u003Cpre style='box-sizing: border-box;font-size: 16px;font-family: SFMono-Regular, Consolas, \"Liberation Mono\", Menlo, Courier, monospace;margin: 10px 0px;overflow: auto;cursor: pointer;border-radius: 5px;box-shadow: rgba(0, 0, 0, 0.55) 0px 2px 10px;text-align: left;padding: 0px;color: rgb(0, 0, 0);font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;letter-spacing: normal;orphans: 2;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Ccode style=\"box-sizing: border-box;font-family: Consolas, Monaco, Menlo, monospace;font-size: 12px;display: -webkit-box;overflow-x: auto;padding: 15px 16px 16px;color: rgb(171, 178, 191);background: rgb(40, 44, 52);cursor: pointer;border-radius: 5px;\">\u003Cspan leaf=\"\">word = \u003C/span>\u003Cspan style=\"box-sizing: border-box;color: rgb(152, 195, 121);cursor: pointer;line-height: 26px;\">\u003Cspan leaf=\"\">\"ubiquitous\"\u003C/span>\u003C/span>\u003Cspan leaf=\"\">\u003Cbr>\u003C/span>\u003Cspan leaf=\"\">result = word[::\u003C/span>\u003Cspan style=\"box-sizing: border-box;color: rgb(209, 154, 102);cursor: pointer;line-height: 26px;\">\u003Cspan leaf=\"\">3\u003C/span>\u003C/span>\u003Cspan leaf=\"\">]\u003C/span>\u003Cspan leaf=\"\">\u003Cbr>\u003C/span>\u003Cspan leaf=\"\">print(result)\u003C/span>\u003Cspan leaf=\"\">\u003Cbr>\u003C/span>\u003C/code>\u003C/pre>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">Output result:\u003C/span>\u003C/strong>\u003C/p>\u003Cpre style='box-sizing: border-box;font-size: 16px;font-family: SFMono-Regular, Consolas, \"Liberation Mono\", Menlo, Courier, monospace;margin: 10px 0px;overflow: auto;cursor: pointer;border-radius: 5px;box-shadow: rgba(0, 0, 0, 0.55) 0px 2px 10px;text-align: left;padding: 0px;color: rgb(0, 0, 0);font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;letter-spacing: normal;orphans: 2;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Ccode style=\"box-sizing: border-box;font-family: Consolas, Monaco, Menlo, monospace;font-size: 12px;display: -webkit-box;overflow-x: auto;padding: 15px 16px 16px;color: rgb(171, 178, 191);background: rgb(40, 44, 52);cursor: pointer;border-radius: 5px;\">\u003Cspan leaf=\"\">U Q T S\u003C/span>\u003Cspan leaf=\"\">\u003Cbr>\u003C/span>\u003C/code>\u003C/pre>\u003Ch3 style='box-sizing: border-box;margin: 30px 0px 15px;color: rgba(0, 0, 0, 0.85);font-weight: 500;cursor: pointer;padding: 0px;display: block;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan style=\"box-sizing: border-box;cursor: pointer;font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;text-align: left;font-weight: bold;display: block;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">Why is the code more reliable than the LLM itself?\u003C/span>\u003C/strong>\u003C/span>\u003C/h3>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">✅ \u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">Python code can manipulate on a per-character basis\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">while LLMs operate on tokens and cannot directly access individual characters.\u003C/span>\u003Cspan leaf=\"\">\u003Cbr>\u003C/span>\u003Cspan leaf=\"\">✅ \u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">With the help of code tools, AI can accurately perform character-level tasks\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">without making mistakes due to tokenization issues.\u003C/span>\u003C/p>\u003Csection mpa-from-tpl=\"t\" data-mpa-action-id=\"m7eh88vm1w21\">\u003Cspan leaf=\"\">\u003Cbr>\u003C/span>\u003C/section>\u003Csection data-mpa-template=\"t\" mpa-from-tpl=\"t\" data-mpa-action-id=\"m7eh88vm1q7e\">\u003Csection style=\"text-align: center;transform: translate3d(2px, 0px, 0px);margin: 10px 0%;padding:0 10px;\" mpa-from-tpl=\"t\">\u003Csection style=\"display: inline-block;min-width: 10%;vertical-align: top;background-color: rgb(255, 202, 0);\" mpa-from-tpl=\"t\">\u003Csection style=\"margin: 5px 0% -5px;transform: translate3d(-5px, 0px, 0px);\" mpa-from-tpl=\"t\">\u003Csection style=\"display: inline-block;min-width: 10%;vertical-align: top;border-style: solid;border-width: 8px 1px 1px;border-radius: 0px;border-color: rgb(62, 62, 62);background-color: rgb(255, 255, 255);padding-right: 8px;padding-left: 8px;\" mpa-from-tpl=\"t\">\u003Csection mpa-from-tpl=\"t\">\u003Csection style=\"text-align: left;padding-right: 8px;padding-left: 8px;\" mpa-from-tpl=\"t\">\u003Cp>\u003Cem>\u003Cstrong mpa-from-tpl=\"t\">\u003Cspan leaf=\"\">Imbalanced intelligence\u003C/span>\u003C/strong>\u003C/em>\u003C/p>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003Csection>\u003Cspan leaf=\"\">AI \u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">It can solve Olympiad-level math problems, answer PhD-level physics, chemistry, and biology questions, but it stumbles repeatedly on the most basic questions.\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">This is precisely\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">AI's jagged intelligence\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">a typical manifestation of.\u003C/span>\u003C/section>\u003Ch2 style='box-sizing: border-box;margin: 30px 0px 15px;color: rgba(0, 0, 0, 0.85);font-weight: 500;cursor: pointer;padding: 0px;display: block;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan style=\"box-sizing: border-box;cursor: pointer;font-size: 22px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;text-align: left;font-weight: bold;display: block;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">1. AI's \"unreasonable\" mistakes on simple problems\u003C/span>\u003C/strong>\u003C/span>\u003C/h2>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">Let's look at a confusing case:\u003C/span>\u003C/p>\u003Cblockquote style='box-sizing: border-box;margin: 20px 0px;cursor: pointer;padding: 10px 10px 10px 20px;border-style: none none none solid;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0.05);width: auto;height: auto;box-shadow: rgba(0, 0, 0, 0) 0px 0px 0px 0px;display: block;overflow: auto;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cp style=\"box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: 0em;text-align: left;text-indent: 0em;padding: 8px 0px;font-weight: normal;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">Is 9.11 greater than 9.9?\u003C/span>\u003C/strong>\u003C/p>\u003C/blockquote>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">✅ The correct answer should be\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">No (9.9 &gt; 9.11)\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">, but a LLM might give the following incorrect answer:\u003C/span>\u003C/p>\u003Cpre style='box-sizing: border-box;font-size: 16px;font-family: SFMono-Regular, Consolas, \"Liberation Mono\", Menlo, Courier, monospace;margin: 10px 0px;overflow: auto;cursor: pointer;border-radius: 5px;box-shadow: rgba(0, 0, 0, 0.55) 0px 2px 10px;text-align: left;padding: 0px;color: rgb(0, 0, 0);font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;letter-spacing: normal;orphans: 2;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Ccode style=\"box-sizing: border-box;font-family: Consolas, Monaco, Menlo, monospace;font-size: 12px;display: -webkit-box;overflow-x: auto;padding: 15px 16px 16px;color: rgb(171, 178, 191);background: rgb(40, 44, 52);cursor: pointer;border-radius: 5px;\">\u003Cspan leaf=\"\">9.11 is greater than 9.9.\u003C/span>\u003Cspan leaf=\"\">\u003Cbr>\u003C/span>\u003C/code>\u003C/pre>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">Even it would\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">attempt to use mathematical logic to explain a wrong conclusion\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">, or after multiple inquiries\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">be inconsistent, sometimes correct and sometimes wrong\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">。\u003C/span>\u003C/p>\u003Csection style=\"text-align: center;\" nodeleaf=\"\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-ratio=\"0.4980392156862745\" data-s=\"300,640\" data-type=\"png\" data-w=\"1020\" type=\"block\" data-imgfileid=\"100010227\" style=\"height: auto !important;\" src=\"./assets/17423770653090.028424495071314393.png\">\u003C/section>\u003Csection style=\"text-align: center;\" nodeleaf=\"\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100010228\" data-ratio=\"0.576171875\" data-s=\"300,640\" data-type=\"png\" data-w=\"1024\" type=\"block\" style=\"height: auto !important;\" src=\"./assets/17423770654130.41774532813244036.png\">\u003C/section>\u003Ch2 style='box-sizing: border-box;margin: 30px 0px 15px;color: rgba(0, 0, 0, 0.85);font-weight: 500;cursor: pointer;padding: 0px;display: block;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan style=\"box-sizing: border-box;cursor: pointer;font-size: 22px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;text-align: left;font-weight: bold;display: block;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">2. Why is AI better at handling complex problems but prone to errors on simple questions?\u003C/span>\u003C/strong>\u003C/span>\u003C/h2>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">In some cases, AI can even solve\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">Olympiad-level math problems\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">, but fails at\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">simple numerical comparisons\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">Making mistakes above seems completely unreasonable. But after further research, the researchers discovered some astonishing phenomena.\u003C/span>\u003C/p>\u003Ch3 style='box-sizing: border-box;margin: 30px 0px 15px;color: rgba(0, 0, 0, 0.85);font-weight: 500;cursor: pointer;padding: 0px;display: block;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan style=\"box-sizing: border-box;cursor: pointer;font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;text-align: left;font-weight: bold;display: block;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">(1) The model may be disturbed by \"non-mathematical factors\"\u003C/span>\u003C/strong>\u003C/span>\u003C/h3>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">The research team found that when the model was\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">calculating the size relationship between 9.11 and 9.9\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">，\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">The activated neurons were highly similar to text patterns related to the Bible.\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">。\u003C/span>\u003C/p>\u003Cul style='box-sizing: border-box;margin: 8px 0px;cursor: pointer;list-style-type: disc;padding: 0px 0px 0px 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;' class=\"list-paddingleft-1\">\u003Cli style=\"box-sizing: border-box;cursor: pointer;\">\u003Csection style=\"box-sizing: border-box;cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);font-size: 16px;line-height: 1.8em;letter-spacing: 0em;text-align: left;font-weight: normal;\">\u003Cspan leaf=\"\">In the Bible,\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">chapter numbers\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">are usually written as\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">9:11 (such as John 9:11)\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">。\u003C/span>\u003C/section>\u003C/li>\u003Cli style=\"box-sizing: border-box;cursor: pointer;\">\u003Csection style=\"box-sizing: border-box;cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);font-size: 16px;line-height: 1.8em;letter-spacing: 0em;text-align: left;font-weight: normal;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">In this format, 9:11 does indeed appear after 9:9.\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">。\u003C/span>\u003C/section>\u003C/li>\u003Cli style=\"box-sizing: border-box;cursor: pointer;\">\u003Csection style=\"box-sizing: border-box;cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);font-size: 16px;line-height: 1.8em;letter-spacing: 0em;text-align: left;font-weight: normal;\">\u003Cspan leaf=\"\">Therefore,\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">the model's memory might be misled\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">into thinking that 9.11 is greater than 9.9, rather than performing a numerical comparison according to mathematical logic.\u003C/span>\u003C/section>\u003C/li>\u003C/ul>\u003Ch3 style='box-sizing: border-box;margin: 30px 0px 15px;color: rgba(0, 0, 0, 0.85);font-weight: 500;cursor: pointer;padding: 0px;display: block;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan style=\"box-sizing: border-box;cursor: pointer;font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;text-align: left;font-weight: bold;display: block;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">(2) Statistical patterns of LLM vs. Logical reasoning\u003C/span>\u003C/strong>\u003C/span>\u003C/h3>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">LLM \u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">Mainly relies on statistical pattern matching rather than true logical reasoning\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">：\u003C/span>\u003C/p>\u003Cul style='box-sizing: border-box;margin: 8px 0px;cursor: pointer;list-style-type: disc;padding: 0px 0px 0px 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;' class=\"list-paddingleft-1\">\u003Cli style=\"box-sizing: border-box;cursor: pointer;\">\u003Csection style=\"box-sizing: border-box;cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);font-size: 16px;line-height: 1.8em;letter-spacing: 0em;text-align: left;font-weight: normal;\">\u003Cspan leaf=\"\">When the model sees\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">“9.11 vs 9.9”\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">，it might incorrectly associate it with\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">text patterns related to Bible verse numbers\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">rather than performing mathematical operations.\u003C/span>\u003C/section>\u003C/li>\u003Cli style=\"box-sizing: border-box;cursor: pointer;\">\u003Csection style=\"box-sizing: border-box;cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);font-size: 16px;line-height: 1.8em;letter-spacing: 0em;text-align: left;font-weight: normal;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">The model cannot distinguish between mathematical context and text patterns\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">, leading it to make non-mathematical decisions.\u003C/span>\u003C/section>\u003C/li>\u003C/ul>\u003Ch3 style='box-sizing: border-box;margin: 30px 0px 15px;color: rgba(0, 0, 0, 0.85);font-weight: 500;cursor: pointer;padding: 0px;display: block;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan style=\"box-sizing: border-box;cursor: pointer;font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;text-align: left;font-weight: bold;display: block;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">(3) LLM is not a perfect mathematical tool\u003C/span>\u003C/strong>\u003C/span>\u003C/h3>\u003Cul style='box-sizing: border-box;margin: 8px 0px;cursor: pointer;list-style-type: disc;padding: 0px 0px 0px 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;' class=\"list-paddingleft-1\">\u003Cli style=\"box-sizing: border-box;cursor: pointer;\">\u003Csection style=\"box-sizing: border-box;cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);font-size: 16px;line-height: 1.8em;letter-spacing: 0em;text-align: left;font-weight: normal;\">\u003Cspan leaf=\"\">The nature of language models is\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">to predict the next most likely token\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">but not strictly enforcing mathematical rules.\u003C/span>\u003C/section>\u003C/li>\u003Cli style=\"box-sizing: border-box;cursor: pointer;\">\u003Csection style=\"box-sizing: border-box;cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);font-size: 16px;line-height: 1.8em;letter-spacing: 0em;text-align: left;font-weight: normal;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">Complex math problems\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">usually rely on more systematic reasoning steps, while\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">simple calculations\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">are more prone to being affected by\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">Noise interference in training data\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">。\u003C/span>\u003C/section>\u003C/li>\u003C/ul>\u003Ch2 style='box-sizing: border-box;margin: 30px 0px 15px;color: rgba(0, 0, 0, 0.85);font-weight: 500;cursor: pointer;padding: 0px;display: block;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan style=\"box-sizing: border-box;cursor: pointer;font-size: 22px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;text-align: left;font-weight: bold;display: block;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">3. Solution: How to improve the computational reliability of LLMs?\u003C/span>\u003C/strong>\u003C/span>\u003C/h2>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">✅ \u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">Do not fully trust AI's mathematical reasoning ability\u003C/span>\u003C/strong>\u003C/p>\u003Cul style='box-sizing: border-box;margin: 8px 0px;cursor: pointer;list-style-type: disc;padding: 0px 0px 0px 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;' class=\"list-paddingleft-1\">\u003Cli style=\"box-sizing: border-box;cursor: pointer;\">\u003Csection style=\"box-sizing: border-box;cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);font-size: 16px;line-height: 1.8em;letter-spacing: 0em;text-align: left;font-weight: normal;\">\u003Cspan leaf=\"\">Due to LLM\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">may be affected by text pattern interference\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">Its calculation result requires\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">additional verification\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">。\u003C/span>\u003C/section>\u003C/li>\u003Cli style=\"box-sizing: border-box;cursor: pointer;\">\u003Csection style=\"box-sizing: border-box;cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);font-size: 16px;line-height: 1.8em;letter-spacing: 0em;text-align: left;font-weight: normal;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">Suggestion: Use external calculation tools (such as Python code interpreters) for validation\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">instead of directly relying on AI's \"mental calculation.\"\u003C/span>\u003C/section>\u003C/li>\u003C/ul>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">✅ \u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">Treat AI as a tool, not the final answer\u003C/span>\u003C/strong>\u003C/p>\u003Cul style='box-sizing: border-box;margin: 8px 0px;cursor: pointer;list-style-type: disc;padding: 0px 0px 0px 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;' class=\"list-paddingleft-1\">\u003Cli style=\"box-sizing: border-box;cursor: pointer;\">\u003Csection style=\"box-sizing: border-box;cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);font-size: 16px;line-height: 1.8em;letter-spacing: 0em;text-align: left;font-weight: normal;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">AI is a \"probabilistic system,\" not an absolutely correct reasoning machine.\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">。\u003C/span>\u003C/section>\u003C/li>\u003Cli style=\"box-sizing: border-box;cursor: pointer;\">\u003Csection style=\"box-sizing: border-box;cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);font-size: 16px;line-height: 1.8em;letter-spacing: 0em;text-align: left;font-weight: normal;\">\u003Cspan leaf=\"\">When using AI to solve problems,\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">we should not blindly copy the answers generated by AI\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">but should\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">verify in combination with context and tools.\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">。\u003C/span>\u003C/section>\u003C/li>\u003C/ul>\u003Cp style=\"display: none;\">\u003Cmp-style-type data-value=\"3\">\u003C/mp-style-type>\u003C/p>\u003C/div>",[257,266,275,282,290,298,305,313],{"id":258,"title_md5":259,"publish_date":260,"author_md5":261,"is_original":4,"collection":262,"summary_md5":263,"cover_url":264,"cover_url_1_1":265},456,"e0bbc6133bd79cb5f37523c9d93549e8","2023-05-17","aaa8ca17ae2e86c309dea80b10f6e68d","#Prompt Engineering","ae29edcd094bc79e8fbda074bb9b7172","article_res/cover/4e52f03409b84713991814f06fd6424d.jpeg","article_res/cover/3a58bf9a788ec37b5d5275f0755c1791.jpeg",{"id":267,"title_md5":268,"publish_date":269,"author_md5":270,"is_original":23,"collection":271,"summary_md5":272,"cover_url":273,"cover_url_1_1":274},312,"917d36c6e00736d191da3effea7a3f7b","2024-03-18","bc27fa490c4d0d525bac812fc0793534","#AI Image Generator","009f9c04790dac9363ac9af4ff0b056d","article_res/cover/fe1d3743e95dbd4eea738b6cf9498981.jpeg","article_res/cover/3ff330b3318fd44092bfae9bb8f93cd3.jpeg",{"id":276,"title_md5":277,"publish_date":278,"author_md5":270,"is_original":4,"collection":5,"summary_md5":279,"cover_url":280,"cover_url_1_1":281},378,"ceaff6047cc5a26e5f5c5d66c1c19fe6","2023-11-27","eaa886461c7fc12f94420857876d4a73","article_res/cover/a6e956c7192e834c23b9791f362717b6.jpeg","article_res/cover/fb444ef951eb25829e83c372cb080eb2.jpeg",{"id":283,"title_md5":284,"publish_date":285,"author_md5":270,"is_original":23,"collection":286,"summary_md5":287,"cover_url":288,"cover_url_1_1":289},65,"355241d059bad6536d0bb9bea822f270","2025-01-20","#AI Image Generator #AI Video Generator #Krea #AI 3D Generator","29bfdafcda8f22bb9177d697aa2e5446","article_res/cover/aa9dbc8db793695f3f24fcf41afc1fee.jpeg","article_res/cover/512cc57558a4f8a8eff6a2acbdb816a3.jpeg",{"id":291,"title_md5":292,"publish_date":293,"author_md5":270,"is_original":23,"collection":294,"summary_md5":295,"cover_url":296,"cover_url_1_1":297},346,"75ddb270704d1a571ae8302b59ef90c2","2024-01-10","#Meta #AI Avatar","8fea49e35437eb7621d9caf4a5760766","article_res/cover/d61bf1e67c7a47df3eba82ad12f33a59.jpeg","article_res/cover/13a49df63a291d41c384c18329d4099c.jpeg",{"id":299,"title_md5":300,"publish_date":301,"author_md5":270,"is_original":23,"collection":5,"summary_md5":302,"cover_url":303,"cover_url_1_1":304},431,"986556a654bb6a34670e283852f6564d","2023-07-21","683fb95c3f129eefbfbba7b84be1013b","article_res/cover/f6fed272015b213f17cb83a239e8c201.jpeg","article_res/cover/ebaf192d04e18300b63317e6845a1f62.jpeg",{"id":306,"title_md5":307,"publish_date":308,"author_md5":270,"is_original":4,"collection":309,"summary_md5":310,"cover_url":311,"cover_url_1_1":312},385,"86e3808158857aa9cb32fd04995bf4ce","2023-10-26","#Stable Diffusion #AI Animation","d2cb1645163307559107cd8ae6ceb914","article_res/cover/daf53481b94fc0c5e57c8105b63673da.jpeg","article_res/cover/a0344c987723c4015380d16b83234750.jpeg",{"id":55,"title_md5":314,"publish_date":56,"author_md5":270,"is_original":4,"collection":57,"summary_md5":315,"cover_url":58,"cover_url_1_1":59},"28b69e9647ff03a4fbe0c2b36af24af2","f8d7b46c6a25b038af4d085fa1bc04f7",{"related":317,"small":358},[318,326,333,342,350],{"id":319,"publish_date":320,"is_original":23,"collection":321,"cover_url":322,"cover_url_1_1":323,"title":324,"summary":325,"author":28},163,"2024-10-01","#Meta #LLM","article_res/cover/cff77b1a39cc810ec45fe2d7ee6d3147.jpeg","article_res/cover/d02e205c34dd3ead44aa034048aadea9.jpeg","Meta's latest LLM release: Llama 3.2 Lightweight and Multimodal.","The open-source AI model that you can fine-tune, distill, and deploy anywhere is now available in more versions.",{"id":327,"publish_date":328,"is_original":23,"collection":5,"cover_url":329,"cover_url_1_1":330,"title":331,"summary":332,"author":28},197,"2024-08-14","article_res/cover/22174743c1501f73c6b18fe0c76eaba5.jpeg","article_res/cover/a4056722ad71dc449138929b900a70fd.jpeg","MiniCPM, a GPT-4V-level multimodal language model running on mobile devices","Today I studied a project called MiniCPM, which is a GPT-4V-level multimodal language model (MLL) running on mobile phones.",{"id":334,"publish_date":335,"is_original":4,"collection":336,"cover_url":337,"cover_url_1_1":338,"title":339,"summary":340,"author":341},491,"2023-04-03","#Stable Diffusion #AI Image Generator","article_res/cover/cc9aebb6229264492bef0fbaf83d1648.jpeg","article_res/cover/4d6ecc3edcc7ed5f6d919b9ee8f09473.jpeg","【AIGC Learning】Stable Diffusion web UI How-to Series - 4 X/Y/Z Chart Usage","This copilot will fall between useful and essential to most professionals and many other sorts of workers - Reid Hoffman","AIGC Learning Notes",{"id":343,"publish_date":344,"is_original":4,"collection":5,"cover_url":345,"cover_url_1_1":346,"title":347,"summary":348,"author":349},581,"2022-04-05","article_res/cover/884cac974c8492a64ba5b91608138495.jpeg","article_res/cover/f0f5a3661a42fda979323d57f0fe697a.jpeg","Talent Inventory - Expanding Capabilities","There is grandeur in this view of life, with its several powers, having been breathed into a few forms or into one.","Management Notes",{"id":351,"publish_date":352,"is_original":23,"collection":353,"cover_url":354,"cover_url_1_1":355,"title":356,"summary":357,"author":28},50,"2025-02-03","#OpenAI #LLM #AI Agent #AI Research #RL","article_res/cover/01d12853914b4549d0556b990ccbfb1b.jpeg","article_res/cover/7260f24c0367b431ad9245c97db8d4c6.jpeg","OpenAI Deep Research: Intelligent research assistant launched","An agent that uses reasoning to synthesize large amounts of online information and complete multi-step research tasks.",[359,365,371],{"title":10,"list":360},[361,362,363,364],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":366},[367,368,369,370],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},{"title":242,"list":372},[],[8,9,10],[8,12,13,14,9,10,15,16,17,18],["Reactive",245],1754646417756]