[{"data":1,"prerenderedAt":374},["ShallowReactive",2],{"$fgukOamtKU1RtUiMFsqdObttmqPPQz0uc7bl_gj_LyX0":3,"$fgwcq9X132foMTd83KDgoOQFt83ttgpz9G_E41V_fa2Q":245,"article-70":373},{"code":4,"msg":5,"data":6},0,"",{"category":7,"tag":11,"popular":19,"latest":86,"banner":126,"list":151,"cache":244},[8,9,10],"Agent","OpenAI","LLM",[8,12,13,14,9,10,15,16,17,18],"Google","Nvidia","Claude","DeepSeek","OCR","Chat","Generator",[20,29,37,45,54,62,70,79],{"id":21,"publish_date":22,"is_original":23,"collection":5,"cover_url":24,"cover_url_1_1":25,"title":26,"summary":27,"author":28},411,"2023-09-10",1,"article_res/cover/451ef50c225a8dc61c4336506794d13b.jpeg","article_res/cover/3ba9dc7a72f87d40b20fc2d225289ee3.jpeg","Idealism","Reality is created by the mind, we can change our reality by changing our mind. - Plato","Renee's Entrepreneurial Journey",{"id":30,"publish_date":31,"is_original":23,"collection":32,"cover_url":33,"cover_url_1_1":34,"title":35,"summary":36,"author":28},108,"2024-12-07","#LLM #AGI #AI Agent","article_res/cover/0039044422e4ec9f61c18e8ee1693bb0.jpeg","article_res/cover/4220971b108a91d21407d87bb02fbaa6.jpeg","Freysa.ai: The World's First Adversarial AI Agent Game","说服 Freysa 把钱包里的钱都拿出来",{"id":38,"publish_date":39,"is_original":23,"collection":40,"cover_url":41,"cover_url_1_1":42,"title":43,"summary":44,"author":28},12,"2025-03-09","#Oxford #Reasoning #LLM #Tool Use","article_res/cover/d448e9b3617a0b5302e1bd10c438bca9.jpeg","article_res/cover/864a468f9cc4c9317efadb3811909888.jpeg","Agentic Reasoning Framework - Significantly enhance the reasoning ability of LLMs through the integration of external tools using agents","Agentic Reasoning: Reasoning LLMs with Tools for Deep Research",{"id":46,"publish_date":47,"is_original":4,"collection":48,"cover_url":49,"cover_url_1_1":50,"title":51,"summary":52,"author":53},480,"2023-04-14","#Stable Diffusion","article_res/cover/0bdbe7cb1de4a78e54536e5d9afa7ec9.jpeg","article_res/cover/b3d6ffec0608dcfaf18c5a69906d1490.jpeg","【AIGC Learning】Generate Prompts Using Word Graphs - Stable Diffusion Web UI Series 13","AI will become a powerful tool in education, transforming the way we learn and deliver instruction.  \n- Reid Hoffman","--",{"id":55,"publish_date":56,"is_original":4,"collection":57,"cover_url":58,"cover_url_1_1":59,"title":60,"summary":61,"author":28},413,"2023-09-08","#Neuroscience","article_res/cover/74f8302d78a23d9430f22171eae136b6.jpeg","article_res/cover/87ca08af81bb304746be5261160964c0.jpeg","Can machines be conscious?","Do we have an ethical obligation to not turn off conscious machines? Would turning them off be murder? No. I don't lose any sleep over unplugging a conscious machine.\n- Jeff Hawkins, \"A Thousand Brains\"",{"id":63,"publish_date":64,"is_original":23,"collection":65,"cover_url":66,"cover_url_1_1":67,"title":68,"summary":69,"author":28},178,"2024-09-09","#Entrepreneurship","article_res/cover/a7224f025b55d1820408085faef63079.jpeg","article_res/cover/11a9995b096cbf64465ef01b8673b154.jpeg","37signals company","This damn sense of relaxation",{"id":71,"publish_date":72,"is_original":4,"collection":73,"cover_url":74,"cover_url_1_1":75,"title":76,"summary":77,"author":78},460,"2023-05-12","#Google","article_res/cover/b970687b12faa52da976f91248c2aa7b.jpeg","article_res/cover/d1e71b52cfd2c63bc6e71f3e85ff135c.jpeg","Learn what BRC-20 and Ordinals are using Google Bard","Ordinals - a new protocol that allows users to store arbitrary data on the Bitcoin blockchain","Google Bard mainly writes",{"id":80,"publish_date":81,"is_original":23,"collection":5,"cover_url":82,"cover_url_1_1":83,"title":84,"summary":85,"author":28},309,"2024-03-26","article_res/cover/9877f95894ee88532d0e6012c23a2df3.jpeg","article_res/cover/20092164ddc109ce6ae56b1984246751.jpeg","Learning the Cancun Upgrade with lepton and perplexity","Building a quick conversation-based search demo with Lepton AI.",[87,95,103,111,119],{"id":88,"publish_date":89,"is_original":23,"collection":90,"cover_url":91,"cover_url_1_1":92,"title":93,"summary":94,"author":28},627,"2025-03-20","#AI Avatar #AI Video Generation","article_res/cover/d95481358f73924989f8c4ee9c75d1c8.jpeg","article_res/cover/b74bc0fab01f8b6a6aa87696c0c3ed8b.jpeg","DisPose: Generating Animated Videos by Driving Video with Reference Images","DisPose is a controllable human image animation method that enhances video generation.",{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},626,"2025-03-21","#Deep Dive into LLMs #LLM #RL #Andrej Karpathy #AlphaGo","article_res/cover/446553a5c8f8f2f07d97b20eaee84e56.jpeg","article_res/cover/e6c2823409c9b34624064b9acbaca6f1.jpeg","AlphaGo and the Power of Reinforcement Learning - Andrej Karpathy's Deep Dive on LLMs (Part 9)","Simply learning from humans will never surpass human capabilities.",{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},625,"2025-03-22","#Deep Dive into LLMs #LLM #RL #RLHF #Andrej Karpathy","article_res/cover/8da81d38b1e5cf558a164710fd8a5389.jpeg","article_res/cover/96f028d76c362a99a0dd56389e8f7a9b.jpeg","Reinforcement Learning from Human Feedback (RLHF) - Andrej Karpathy's Deep Dive on LLMs (Part 10)","Fine-Tuning Language Models from Human Preferences",{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},624,"2025-03-23","#Deep Dive into LLMs #LLM #Andrej Karpathy #AI Agent #MMM","article_res/cover/a5e7c3d48bb09109684d6513287c661d.jpeg","article_res/cover/d3f22b7c0ab8d82fd2da457a299e0773.jpeg","The Future of Large Language Models - Andrej Karpathy's In-Depth Explanation of LLM (Part 11)","preview of things to come",{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},623,"#Google #Voe #AI Video Generation","article_res/cover/c44062fea0f336c2b96b3928292392c2.jpeg","article_res/cover/a041041c69092ad3db191c5bf3ff981b.jpeg","Trial of Google's video generation model VOE2","Our state-of-the-art video generation model",[127,135,143],{"id":128,"publish_date":129,"is_original":23,"collection":130,"cover_url":131,"cover_url_1_1":132,"title":133,"summary":134,"author":28},300,"2024-04-16","#AI in Science #AGI","article_res/cover/6bf01e793e0f33e848572412eebdf9b0.jpeg","article_res/cover/91a5ee21dafecb914fabeb9430d46ec1.jpeg","Would Einstein lose his job - AI and Quantum Computing: A Glimpse into the Near Future","So Einstein's job is still safe.",{"id":136,"publish_date":137,"is_original":23,"collection":138,"cover_url":139,"cover_url_1_1":140,"title":141,"summary":142,"author":28},101,"2024-12-14","#Nvidia #AI 3D Generator","article_res/cover/693e07c85980c5c0c8fde3f037733f23.jpeg","article_res/cover/9ea8edff2d5d303ff3fffff3f6f9c3d9.jpeg","NVIDIA's open-source 3D project LLaMA-Mesh","LLaMA-Mesh: Unifying 3D Mesh Generation with Language Models",{"id":144,"publish_date":145,"is_original":23,"collection":146,"cover_url":147,"cover_url_1_1":148,"title":149,"summary":150,"author":28},131,"2024-11-10","#OpenAI","article_res/cover/87f8ed353ce39f31960e7cdfaf075a35.jpeg","article_res/cover/f597a63935f5cd32e484b4aadd6019e8.jpeg","ChatGPT has launched the Search function","Get fast, timely answers with links to relevant web sources.",{"big":152,"small":214},[153,181],{"title":154,"list":155},"AGENT",[156,157,165,173],{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":158,"publish_date":159,"is_original":23,"collection":160,"cover_url":161,"cover_url_1_1":162,"title":163,"summary":164,"author":28},622,"2025-03-24","#OWL #AI Agent #MAS #MCP #CUA","article_res/cover/cb50ca7f2bf4d1ed50202d7406e1c19a.jpeg","article_res/cover/4aa7aa3badfacf3cc84121334f1050dd.jpeg","OWL: Multi-agent collaboration","OWL: Optimized Workforce Learning for General Multi-Agent Assistance in Real-World Task Automation",{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},620,"2025-03-26","#LLM #Google #Gemini #AI Agent","article_res/cover/53751a6dbbe990b1eb0b63f3b062aed4.jpeg","article_res/cover/031344981f0a212ff82d1f3a64aa5756.jpeg","Gemini 2.5 Pro, claimed to be far ahead of the competition, has been released with great fanfare: comprehensively surpassing other LLMs and topping the global rankings","Gemini 2.5: Our most intelligent AI model",{"id":174,"publish_date":175,"is_original":23,"collection":176,"cover_url":177,"cover_url_1_1":178,"title":179,"summary":180,"author":28},616,"2025-03-29","#MAS #AI Agent #AI Coder #MetaGPT #MGX","article_res/cover/9dcd702ad2035902e5e77967c34a1f1e.jpeg","article_res/cover/0a97fc4a922753c8f46ff38792020df8.jpeg","MGX - An automated website-building platform composed of multiple AI Agents","Your 24/7 AI Team | Dream, Chat, Create.",{"title":182,"list":183},"OPENAI",[184,191,199,206],{"id":185,"publish_date":167,"is_original":23,"collection":186,"cover_url":187,"cover_url_1_1":188,"title":189,"summary":190,"author":28},619,"#OpenAI #AI Image Generator #4o #MMM #AR Transformer","article_res/cover/2faffc97fcecf3151552cb0fd3206d89.jpeg","article_res/cover/1133cb4948af44cee2e7fbe79efb69e5.jpeg","The native image function of GPT-4o is officially launched","Introducing 4o Image Generation",{"id":192,"publish_date":193,"is_original":4,"collection":194,"cover_url":195,"cover_url_1_1":196,"title":197,"summary":198,"author":28},434,"2023-07-15","#Anthropic #OpenAI #Google #AI Code Generator #Claude","article_res/cover/e1b6f600a2b9f262a4392684e5f2ce25.jpeg","article_res/cover/6e1772e83f78f9a351ab23d3e414adee.jpeg","Latest Updates on Google Bard /Anthropic Claude2 / ChatGPT Code Interpreter","We want our models to use their programming skills to provide more natural interfaces to the basic functions of our computers.  \n - OpenAI",{"id":200,"publish_date":201,"is_original":4,"collection":146,"cover_url":202,"cover_url_1_1":203,"title":204,"summary":205,"author":28},417,"2023-08-24","article_res/cover/bccf897d50a88b18364e35f7466387e0.jpeg","article_res/cover/2f871085c1073717c1703ae86e18056f.jpeg","The GPT-3.5 Turbo fine-tuning (fine-tuning function) has been released～","Developers can now bring their own data to customize GPT-3.5 Turbo for their use cases.",{"id":207,"publish_date":208,"is_original":4,"collection":209,"cover_url":210,"cover_url_1_1":211,"title":212,"summary":213,"author":28},407,"2023-09-22","#OpenAI #AI Image Generator","article_res/cover/c59005e903d35cfc32346e2756e2728a.jpeg","article_res/cover/ba011d265e6d84b5c8cb6fd6b757b6cc.jpeg","Dall-E 3","DALL·E 3 understands significantly more nuance and detail, allowing you to easily translate your ideas into images.",[215,221,241],{"title":10,"list":216},[217,218,219,220],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":223},"GOOGLE",[224,225,226,234],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},615,"2025-03-30","#AI Researcher #AI Science #HKU #Google #AI Agent","article_res/cover/21fadf906067714bb0db31ae13a77c15.jpeg","article_res/cover/2697999a72bd26b22e85f0e92936d3ed.jpeg","AI-Researcher: LLM-driven全自动 scientific research assistant","AI-Researcher: Fully-Automated Scientific Discovery with LLM Agents  \nOpen-Sourced Alternative to Google AI Co-Scientist",{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},463,"2023-05-09","article_res/cover/89800f207723acdb55fc53bf999ebdc9.jpeg","article_res/cover/5764f369b4accd8f83e94aa4c077a175.jpeg","The Smallville sandbox world - A town with 25 virtual residents","Believable proxies of human behavior can empower interactive apps: Immersive environment, Rehearsal space, Prototyping tool",{"title":242,"list":243},"NVIDIA",[],true,{"code":4,"msg":5,"data":246},{"id":247,"publish_date":248,"is_original":23,"collection":249,"articles_id":250,"cover_url":251,"cover_url_1_1":252,"title":253,"summary":254,"author":28,"content":255,"popular":256,"list":313,"category":371,"tag":372},70,"2025-01-15","#Google #LLM","PxWmCIIQFQqg8x7_tGbVtQ","article_res/cover/b014765b22653d87d4b09437b3578b8d.jpeg","article_res/cover/714f4bec5c754f37ed7679e409f188f9.jpeg","Google has released TITANS, the successor to the Transformer architecture","Titans: Learning to Memorize at Test Time","\u003Cdiv class=\"rich_media_content js_underline_content\n                       autoTypeSetting24psection\n            \" id=\"js_content\">\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>First, the term \"successor to the Transformer architecture\" above 👆🏻 is something I saw someone use on x.com. I don't have the capability to judge the importance of TITANS; I'm just learning about it.\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>The attention mechanism (Attention) has been key to the progress of most large language models (LLMs), but it cannot scale to long contexts.\u003C/p>\u003Cblockquote style='margin-top: 20px;margin-bottom: 20px;padding: 10px 10px 10px 20px;border-top: 3px none rgba(0, 0, 0, 0.4);border-right: 3px none rgba(0, 0, 0, 0.4);border-bottom: 3px none rgba(0, 0, 0, 0.4);border-left-color: rgba(0, 0, 0, 0.4);color: rgb(0, 0, 0);font-size: 16px;cursor: pointer;border-radius: 0px;background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0.05);width: auto;height: auto;box-shadow: rgba(0, 0, 0, 0) 0px 0px 0px 0px;overflow: auto;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;'>\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;cursor: pointer;line-height: 1.8em;letter-spacing: 0em;text-indent: 0em;\">“The true art of memory is the art of attention!\"\u003C/p>\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;cursor: pointer;line-height: 1.8em;letter-spacing: 0em;text-indent: 0em;\">— Samuel Johnson, 1787\u003C/p>\u003C/blockquote>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>TITANS is a new architecture that combines the attention mechanism with a meta context memory, allowing it to learn how to remember at test time. Compared to Transformers and modern linear RNNs, TITANS outperforms them in performance and can effectively scale to a context window of over 2M, surpassing even very large models like GPT-4 and Llama3-80B.\u003C/p>\u003Csection data-mpa-template=\"t\" mpa-from-tpl=\"t\">\u003Csection data-mpa-template-id=\"1024\" data-mpa-category=\"模板\" mpa-from-tpl=\"t\" data-mpa-material-zoom=\"0\">\u003Csection data-mpa-category=\"模板\" yb-mpa-mark=\"mark-main\" style=\"width: 100%;padding: 0 14px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 100%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"padding: 12px 12px 21px 12px;border-top: 2px solid #1D7850;border-right: 2px solid #1D7850;border-left: 2px solid #1D7850;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: center;align-items: center;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg class=\"rich_pages wxw-img\" data-cropselx1=\"0\" data-cropselx2=\"522\" data-cropsely1=\"0\" data-cropsely2=\"690\" data-imgfileid=\"100009203\" data-ratio=\"1.1027777777777779\" data-w=\"1080\" style=\"width: 578px;height: 637px;\" src=\"./assets/17423771795820.9296873986667242.jpeg\">\u003C/section>\u003C/section>\u003Csection style=\"width: 100%;display: flex;align-items: flex-start;justify-content: space-between;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 60px;height: 2px;border-top: 2px solid #1D7850;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr mpa-from-tpl=\"t\">\u003C/section>\u003Csection data-mid=\"\" mpa-from-tpl=\"t\" style=\"margin-top: -11px;margin-bottom: 0px;text-align: center;\">\u003Cp data-mid=\"\" style=\"font-weight: bold;font-size: 18px;color: rgb(228, 91, 85);line-height: 25px;word-break: break-word;\">https://arxiv.org/pdf/2501.00663v1\u003C/p>\u003C/section>\u003Csection data-mid=\"\" mpa-from-tpl=\"t\" style=\"margin-bottom: 0px;width: 60px;height: 2px;border-top: 2px solid rgb(29, 120, 80);\">\u003Cbr mpa-from-tpl=\"t\" style=\"letter-spacing: 0.578px;\">\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>In summary, the introduction of the TITANS architecture is an innovative attempt to address the problem of long contexts and improve memory capabilities. Compared to traditional Transformer architectures, TITANS maintains efficient performance over a larger context window and can dynamically remember during testing, demonstrating its potential.\u003C/p>\u003Csection data-mpa-template=\"t\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: center;align-items: center;width: 100%;padding: 0px 6px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: flex-start;align-items: center;flex-direction: column;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 14px;height: 37px;align-self: flex-start;display: flex;justify-content: center;align-items: center;margin-left: -20.1px;margin-top: 10px;margin-bottom: -27.1px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg class=\"rich_pages wxw-img\" data-imgfileid=\"100009168\" data-ratio=\"2.7142857142857144\" data-w=\"28\" src=\"./assets/17423771795750.31696045001723916.png\">\u003C/section>\u003Csection style=\"display: flex;justify-content: center;align-items: center;align-self: flex-end;margin-right: 8px;margin-bottom: 5px;z-index: 1;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 3px;height: 3px;background: #FFFFFF;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFDDDD;border-radius: 50%;margin-left: 5px;margin-right: 5px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFE730;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003C/section>\u003Csection style=\"width: 8px;height: 10px;align-self: flex-end;margin-right: -12.1px;margin-bottom: -20.1px;display: flex;justify-content: center;align-items: center;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg class=\"rich_pages wxw-img\" data-imgfileid=\"100009169\" data-ratio=\"1.25\" data-w=\"16\" src=\"./assets/17423771795740.2483567694571971.png\">\u003C/section>\u003Csection style=\"text-align: left;padding: 6px 14px 7px 15px;border-width: 7px 1px 1px;border-style: solid;border-color: rgb(0, 0, 0);\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cp style=\"font-size: 16px;font-family: PingFangSC-Semibold, PingFang SC;font-weight: bold;color: #333333;line-height: 22px;letter-spacing: 1px;\" data-mid=\"\">How to design long-term memory?\u003C/p>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>The TITANS team approached this question from the perspective of human memory. Human short-term memory is very accurate but has a limited window (about 30 seconds). So, how to handle longer contexts? The TITANS team used other types of memory systems to store potentially useful information.\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>They believe that the attention mechanism, due to its limited context window and accurate dependency modeling, serves as short-term memory. Therefore, TITANS needs a neural network memory module that can remember a longer history as a long-term and more persistent memory.\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>: the memory system is responsible for storing information, but remembering training data may be useless during testing because the test data distribution may differ from the training data. Therefore, the TITANS team needs to teach the memory module how to remember/forget information during testing.\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>To this end, the TITANS team proposed: encoding past history into the parameters of the neural network (similar to TTT) and training an online meta-model to learn how to remember/forget data during testing.\u003C/p>\u003Csection data-mpa-template=\"t\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: center;align-items: center;width: 100%;padding: 0px 6px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: flex-start;align-items: center;flex-direction: column;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 14px;height: 37px;align-self: flex-start;display: flex;justify-content: center;align-items: center;margin-left: -20.1px;margin-top: 10px;margin-bottom: -27.1px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg data-imgfileid=\"100009171\" data-ratio=\"2.7142857142857144\" data-w=\"28\" src=\"./assets/17423771795740.6007350348544211.png\">\u003C/section>\u003Csection style=\"display: flex;justify-content: center;align-items: center;align-self: flex-end;margin-right: 8px;margin-bottom: 5px;z-index: 1;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 3px;height: 3px;background: #FFFFFF;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFDDDD;border-radius: 50%;margin-left: 5px;margin-right: 5px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFE730;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003C/section>\u003Csection style=\"width: 8px;height: 10px;align-self: flex-end;margin-right: -12.1px;margin-bottom: -20.1px;display: flex;justify-content: center;align-items: center;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg class=\"rich_pages wxw-img\" data-imgfileid=\"100009170\" data-ratio=\"1.25\" data-w=\"16\" src=\"./assets/17423771797360.07401385131989735.png\">\u003C/section>\u003Csection style=\"text-align: left;padding: 6px 14px 7px 15px;border-width: 7px 1px 1px;border-style: solid;border-color: rgb(0, 0, 0);\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cp style=\"font-size: 16px;font-family: PingFangSC-Semibold, PingFang SC;font-weight: bold;color: #333333;line-height: 22px;letter-spacing: 1px;\" data-mid=\"\">Which tokens need to be remembered?\u003C/p>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>The TITANS team again approached this question from the perspective of human memory. The human brain prioritizes remembering events that defy expectations (i.e., surprising events). However, although an event may be surprising at one moment, it may not continue to surprise us. The initial moment is enough to draw attention, thus remembering the entire timeframe.\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>The TITANS team simulated this process to train long-term memory, dividing the surprise of a token into:\u003C/p>\u003Col style='margin-top: 8px;margin-bottom: 8px;cursor: pointer;padding-left: 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);' class=\"list-paddingleft-1\">\u003Cli style=\"cursor: pointer;\">\u003Csection style=\"cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">\u003Cstrong style=\"cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">Instantaneous surprise\u003C/strong>\u003C/section>\u003C/li>\u003Cli style=\"cursor: pointer;\">\u003Csection style=\"cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">\u003Cstrong style=\"cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">(Decaying) past surprise\u003C/strong>\u003C/section>\u003C/li>\u003C/ol>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>Instantaneous surprise is measured by the gradient between the memory and the incoming token, while past surprise is the decaying cumulative value of past tokens.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100009182\" data-ratio=\"0.16296296296296298\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"\" src=\"./assets/17423771798170.9766011693633585.jpeg\">\u003C/p>\u003Csection data-mpa-template=\"t\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: center;align-items: center;width: 100%;padding: 0px 6px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: flex-start;align-items: center;flex-direction: column;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 14px;height: 37px;align-self: flex-start;display: flex;justify-content: center;align-items: center;margin-left: -20.1px;margin-top: 10px;margin-bottom: -27.1px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg data-imgfileid=\"100009172\" data-ratio=\"2.7142857142857144\" data-w=\"28\" src=\"./assets/17423771797270.6140339382779112.png\">\u003C/section>\u003Csection style=\"display: flex;justify-content: center;align-items: center;align-self: flex-end;margin-right: 8px;margin-bottom: 5px;z-index: 1;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 3px;height: 3px;background: #FFFFFF;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFDDDD;border-radius: 50%;margin-left: 5px;margin-right: 5px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFE730;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003C/section>\u003Csection style=\"width: 8px;height: 10px;align-self: flex-end;margin-right: -12.1px;margin-bottom: -20.1px;display: flex;justify-content: center;align-items: center;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg data-imgfileid=\"100009173\" data-ratio=\"1.25\" data-w=\"16\" src=\"./assets/17423771795820.07590436488943664.png\">\u003C/section>\u003Csection style=\"text-align: left;padding: 6px 14px 7px 15px;border-width: 7px 1px 1px;border-style: solid;border-color: rgb(0, 0, 0);\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cp style=\"font-size: 16px;font-family: PingFangSC-Semibold, PingFang SC;font-weight: bold;color: #333333;line-height: 22px;letter-spacing: 1px;\" data-mid=\"\">How is memory forgotten?\u003C/p>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>in the memory update rule. Interestingly, this weight decay can be seen as a generalized form of data-dependent gating in RNNs, utilizing matrix or vector-valued memory.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100009183\" data-ratio=\"0.12314814814814815\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"\" src=\"./assets/17423771795760.6322422517538382.jpeg\">\u003C/p>\u003Csection data-mpa-template=\"t\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: center;align-items: center;width: 100%;padding: 0px 6px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: flex-start;align-items: center;flex-direction: column;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 14px;height: 37px;align-self: flex-start;display: flex;justify-content: center;align-items: center;margin-left: -20.1px;margin-top: 10px;margin-bottom: -27.1px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg data-imgfileid=\"100009175\" data-ratio=\"2.7142857142857144\" data-w=\"28\" src=\"./assets/17423771798290.40493392122197336.png\">\u003C/section>\u003Csection style=\"display: flex;justify-content: center;align-items: center;align-self: flex-end;margin-right: 8px;margin-bottom: 5px;z-index: 1;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 3px;height: 3px;background: #FFFFFF;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFDDDD;border-radius: 50%;margin-left: 5px;margin-right: 5px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFE730;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003C/section>\u003Csection style=\"width: 8px;height: 10px;align-self: flex-end;margin-right: -12.1px;margin-bottom: -20.1px;display: flex;justify-content: center;align-items: center;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg data-imgfileid=\"100009174\" data-ratio=\"1.25\" data-w=\"16\" src=\"./assets/17423771795770.4555224796705941.png\">\u003C/section>\u003Csection style=\"text-align: left;padding: 6px 14px 7px 15px;border-width: 7px 1px 1px;border-style: solid;border-color: rgb(0, 0, 0);\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cp style=\"font-size: 16px;font-family: PingFangSC-Semibold, PingFang SC;font-weight: bold;color: #333333;line-height: 22px;letter-spacing: 1px;\" data-mid=\"\">Is this design parallelizable?\u003C/p>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>and combined it with weight decay through additional matrix multiplication. So, how to handle the decaying past surprise? The TITANS team realized that it could be calculated through parallel scanning within each mini-batch.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100009184\" data-ratio=\"0.26666666666666666\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"\" src=\"./assets/17423771809620.7198599840749913.jpeg\">\u003C/p>\u003Csection data-mpa-template=\"t\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: center;align-items: center;width: 100%;padding: 0px 6px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: flex-start;align-items: center;flex-direction: column;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 14px;height: 37px;align-self: flex-start;display: flex;justify-content: center;align-items: center;margin-left: -20.1px;margin-top: 10px;margin-bottom: -27.1px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg class=\"rich_pages wxw-img\" data-imgfileid=\"100009176\" data-ratio=\"2.7142857142857144\" data-w=\"28\" src=\"./assets/17423771797280.4037294704491825.png\">\u003C/section>\u003Csection style=\"display: flex;justify-content: center;align-items: center;align-self: flex-end;margin-right: 8px;margin-bottom: 5px;z-index: 1;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 3px;height: 3px;background: #FFFFFF;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFDDDD;border-radius: 50%;margin-left: 5px;margin-right: 5px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFE730;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003C/section>\u003Csection style=\"width: 8px;height: 10px;align-self: flex-end;margin-right: -12.1px;margin-bottom: -20.1px;display: flex;justify-content: center;align-items: center;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg data-imgfileid=\"100009177\" data-ratio=\"1.25\" data-w=\"16\" src=\"./assets/17423771795800.8474519930792841.png\">\u003C/section>\u003Csection style=\"text-align: left;padding: 6px 14px 7px 15px;border-width: 7px 1px 1px;border-style: solid;border-color: rgb(0, 0, 0);\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cp style=\"font-size: 16px;font-family: PingFangSC-Semibold, PingFang SC;font-weight: bold;color: #333333;line-height: 22px;letter-spacing: 1px;\" data-mid=\"\">How to integrate memory?\u003C/p>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>The TITANS team demonstrated three architectural variants where memory can serve as:\u003C/p>\u003Col style='margin-top: 8px;margin-bottom: 8px;cursor: pointer;padding-left: 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);' class=\"list-paddingleft-1\">\u003Cli style=\"cursor: pointer;\">\u003Csection style=\"cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">\u003Cstrong style=\"cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">context\u003C/strong>\u003C/section>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100009185\" data-ratio=\"0.45\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"\" src=\"./assets/17423771805050.7307751076615703.jpeg\">\u003C/p>\u003C/li>\u003Cli style=\"cursor: pointer;\">\u003Csection style=\"cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">\u003Cstrong style=\"cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">head\u003C/strong>\u003C/section>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100009202\" data-ratio=\"0.45\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"\" src=\"./assets/17423771804820.21356340266253326.jpeg\">\u003C/p>\u003C/li>\u003Cli style=\"cursor: pointer;\">\u003Csection style=\"cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">\u003Cstrong style=\"cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">layer\u003C/strong>\u003C/section>\u003C/li>\u003C/ol>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>divides the input into segments (which can be large, even equal to the context window of current attention-based large language models) and uses past memory states to extract corresponding memories, which are then updated through attention outputs.\u003C/p>\u003Csection data-mpa-template=\"t\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: center;align-items: center;width: 100%;padding: 0px 6px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: flex-start;align-items: center;flex-direction: column;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 14px;height: 37px;align-self: flex-start;display: flex;justify-content: center;align-items: center;margin-left: -20.1px;margin-top: 10px;margin-bottom: -27.1px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg class=\"rich_pages wxw-img\" data-imgfileid=\"100009178\" data-ratio=\"2.7142857142857144\" data-w=\"28\" src=\"./assets/17423771795790.24203358995553037.png\">\u003C/section>\u003Csection style=\"display: flex;justify-content: center;align-items: center;align-self: flex-end;margin-right: 8px;margin-bottom: 5px;z-index: 1;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 3px;height: 3px;background: #FFFFFF;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFDDDD;border-radius: 50%;margin-left: 5px;margin-right: 5px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFE730;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003C/section>\u003Csection style=\"width: 8px;height: 10px;align-self: flex-end;margin-right: -12.1px;margin-bottom: -20.1px;display: flex;justify-content: center;align-items: center;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg data-imgfileid=\"100009179\" data-ratio=\"1.25\" data-w=\"16\" src=\"./assets/17423771797280.7438169491539515.png\">\u003C/section>\u003Csection style=\"text-align: left;padding: 6px 14px 7px 15px;border-width: 7px 1px 1px;border-style: solid;border-color: rgb(0, 0, 0);\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cp style=\"font-size: 16px;font-family: PingFangSC-Semibold, PingFang SC;font-weight: bold;color: #333333;line-height: 22px;letter-spacing: 1px;\" data-mid=\"\">TITANS' performance in experiments\u003C/p>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>The TITANS team focused on language modeling, common sense reasoning, \"needle in a haystack,\" and time series prediction tasks,\u003C/p>\u003Cp>\u003Cimg class=\"rich_pages wxw-img\" data-imgfileid=\"100009198\" data-ratio=\"1.0972222222222223\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"\" src=\"./assets/17423771814550.5832040416976465.jpeg\">\u003C/p>\u003Cp>\u003Cimg class=\"rich_pages wxw-img\" data-imgfileid=\"100009190\" data-ratio=\"0.3148148148148148\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"\" src=\"./assets/17423771805990.23428925418218038.jpeg\">\u003C/p>\u003Cp>\u003Cimg class=\"rich_pages wxw-img\" data-imgfileid=\"100009189\" data-ratio=\"0.29074074074074074\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423771795900.9226449062306954.png\">\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>GPT-4 and Llama3-80B.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100009192\" data-ratio=\"0.40185185185185185\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"\" src=\"./assets/17423771797460.9067818671890819.jpeg\">\u003C/p>\u003Csection data-mpa-template=\"t\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: center;align-items: center;width: 100%;padding: 0px 6px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"display: flex;justify-content: flex-start;align-items: center;flex-direction: column;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 14px;height: 37px;align-self: flex-start;display: flex;justify-content: center;align-items: center;margin-left: -20.1px;margin-top: 10px;margin-bottom: -27.1px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg class=\"rich_pages wxw-img\" data-imgfileid=\"100009181\" data-ratio=\"2.7142857142857144\" data-w=\"28\" src=\"./assets/17423771797470.4529312295553136.png\">\u003C/section>\u003Csection style=\"display: flex;justify-content: center;align-items: center;align-self: flex-end;margin-right: 8px;margin-bottom: 5px;z-index: 1;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Csection style=\"width: 3px;height: 3px;background: #FFFFFF;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFDDDD;border-radius: 50%;margin-left: 5px;margin-right: 5px;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003Csection style=\"width: 3px;height: 3px;background: #FFE730;border-radius: 50%;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cbr>\u003C/section>\u003C/section>\u003Csection style=\"width: 8px;height: 10px;align-self: flex-end;margin-right: -12.1px;margin-bottom: -20.1px;display: flex;justify-content: center;align-items: center;\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cimg data-imgfileid=\"100009180\" data-ratio=\"1.25\" data-w=\"16\" src=\"./assets/17423771797360.3545994181030392.png\">\u003C/section>\u003Csection style=\"text-align: left;padding: 6px 14px 7px 15px;border-width: 7px 1px 1px;border-style: solid;border-color: rgb(0, 0, 0);\" data-mid=\"\" mpa-from-tpl=\"t\">\u003Cp style=\"font-size: 16px;font-family: PingFangSC-Semibold, PingFang SC;font-weight: bold;color: #333333;line-height: 22px;letter-spacing: 1px;\" data-mid=\"\">Summary\u003C/p>\u003C/section>\u003C/section>\u003C/section>\u003C/section>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>The TITANS architecture demonstrates how to solve the problem of long contexts by combining a dynamic memory module with the attention mechanism. It far exceeds existing Transformer and RNN architectures in performance (according to the authors) and, through diverse memory mechanisms, can handle different tasks, showcasing its advantages in processing large context windows.\u003C/p>\u003Cp style=\"display: none;\">\u003Cmp-style-type data-value=\"3\">\u003C/mp-style-type>\u003C/p>\u003C/div>",[257,261,269,278,286,289,297,305],{"id":185,"title_md5":258,"publish_date":167,"author_md5":259,"is_original":23,"collection":186,"summary_md5":260,"cover_url":187,"cover_url_1_1":188},"e73415acabc9bb396be72a35c59fc6de","bc27fa490c4d0d525bac812fc0793534","8886f8e44abb23e325ac5af11848a6f8",{"id":262,"title_md5":263,"publish_date":264,"author_md5":259,"is_original":23,"collection":265,"summary_md5":266,"cover_url":267,"cover_url_1_1":268},272,"d6fa93d639e537e37f5c1f4c299e9343","2024-05-18","#AI Avatar","53e1e5bd4dee1f8c4d536ad6dfc4ed32","article_res/cover/9820666229716f76781eb3ec035bc90e.jpeg","article_res/cover/dabaa35458f0c8f35d6df114336f0091.jpeg",{"id":270,"title_md5":271,"publish_date":272,"author_md5":273,"is_original":4,"collection":274,"summary_md5":275,"cover_url":276,"cover_url_1_1":277},166,"84bee74fc688483d7f09a720d65d035f","2024-09-28","fb0edf26dea7e6e8b89a99bc9d3a3170","#History of Intelligence #Neuroscience","65de02c4c252f8549f2c8cf3b410395d","article_res/cover/f8369fb63aabb4d9022b46f4e4786223.jpeg","article_res/cover/a89cf18d49193ae495dffd23bd3623e7.jpeg",{"id":279,"title_md5":280,"publish_date":281,"author_md5":259,"is_original":23,"collection":282,"summary_md5":283,"cover_url":284,"cover_url_1_1":285},159,"248dd6e4a24fadf8cbf9b2af9842bad8","2024-10-05","#AI Video Generator #Meta #AI Video Editor","07b54e7f8d863dd4a63548b9be016d5f","article_res/cover/9ef71241f1bd883f1b09e353139e324b.jpeg","article_res/cover/32e019ffdaf19f1a17ade55ec1caaf2f.jpeg",{"id":63,"title_md5":287,"publish_date":64,"author_md5":259,"is_original":23,"collection":65,"summary_md5":288,"cover_url":66,"cover_url_1_1":67},"dbee0f3a7435bdae29375353c8916144","5ec7a49091386e6ea5cb0782995c4195",{"id":290,"title_md5":291,"publish_date":292,"author_md5":259,"is_original":23,"collection":293,"summary_md5":294,"cover_url":295,"cover_url_1_1":296},173,"aabb53182484ffd78a454dda357a05e8","2024-09-19","#All-In Summit 2024 #Entrepreneurship #Nvidia","73305c97f175c3a89c05f5d890250603","article_res/cover/cff1aaf74ae5b1f705021906cd457d1e.jpeg","article_res/cover/0e563388215b53402905d9e559675faa.jpeg",{"id":298,"title_md5":299,"publish_date":300,"author_md5":259,"is_original":23,"collection":301,"summary_md5":302,"cover_url":303,"cover_url_1_1":304},28,"be6612d79d187bbb070061a7099947f7","2025-02-24","#Andrej Karpathy #Deep Dive into LLMs #LLM #RL #DeepSeek","44b2596e7fcb916bb7ff3a443e4e4fd6","article_res/cover/db62a8c43fa565112c0aefa8776e1de2.jpeg","article_res/cover/cbb4289951cb6a0cda64f8fd913a2e23.jpeg",{"id":306,"title_md5":307,"publish_date":308,"author_md5":259,"is_original":4,"collection":309,"summary_md5":310,"cover_url":311,"cover_url_1_1":312},385,"86e3808158857aa9cb32fd04995bf4ce","2023-10-26","#Stable Diffusion #AI Animation","d2cb1645163307559107cd8ae6ceb914","article_res/cover/daf53481b94fc0c5e57c8105b63673da.jpeg","article_res/cover/a0344c987723c4015380d16b83234750.jpeg",{"related":314,"small":356},[315,323,331,339,347],{"id":316,"publish_date":317,"is_original":23,"collection":318,"cover_url":319,"cover_url_1_1":320,"title":321,"summary":322,"author":28},356,"2023-12-29","#Prompt Engineering #LLM #Langchain","article_res/cover/b0f8821a7f4dc973dab6a6cd5c8af14a.jpeg","article_res/cover/47f95d0ae22c01db9fc24a4a3380f202.jpeg","How to make LLMs smarter","This guide shares strategies and tactics for getting better results from large language models.",{"id":324,"publish_date":325,"is_original":4,"collection":326,"cover_url":327,"cover_url_1_1":328,"title":329,"summary":330,"author":28},462,"2023-05-10","#Prompt Engineering","article_res/cover/0fa06d20a89edca8b63f9e4bffba7194.jpeg","article_res/cover/a562f93946c8c8894bc795a01ae9b0ba.jpeg","ChatGPT Prompt Writing Guide 🧭","It is crucial to understand the proper way to ask ChatGPT in order to obtain the high-quality outputs we desire.",{"id":332,"publish_date":333,"is_original":23,"collection":334,"cover_url":335,"cover_url_1_1":336,"title":337,"summary":338,"author":28},59,"2025-01-25","#LLM #DeepSeek #RL #Distillation","article_res/cover/110330c9174424ce76666ff1ebfe0b67.jpeg","article_res/cover/6c8fe943cee312750ef7c49d673f63f8.jpeg","Paper of DeepSeek-R1: Exploration and Breakthrough of the New Generation Inference Model","DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning",{"id":340,"publish_date":341,"is_original":23,"collection":342,"cover_url":343,"cover_url_1_1":344,"title":345,"summary":346,"author":28},345,"2024-01-11","#AI Avatar #Alibaba","article_res/cover/e00ea19d0fed3791dc0391d6c0c2a526.jpeg","article_res/cover/b4a6e2e96b26826bbe22565adc089996.jpeg","Alibaba's DreaMoving: A character video generation framework based on diffusion models.","DreaMoving is a diffusion-based controllable video generation framework to produce high-quality customized human videos.",{"id":348,"publish_date":349,"is_original":4,"collection":350,"cover_url":351,"cover_url_1_1":352,"title":353,"summary":354,"author":355},589,"2022-03-28","#History","article_res/cover/0542a11a661c0beab5c7f6f4c69fa430.jpeg","article_res/cover/a4173a2e5858eb51b7899361274f65d0.jpeg","What are \"history\" and \"historiography\"?","Who controls the past controls the future: who controls the present controls the past.","Course notes",[357,363,369],{"title":10,"list":358},[359,360,361,362],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":364},[365,366,367,368],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},{"title":242,"list":370},[],[8,9,10],[8,12,13,14,9,10,15,16,17,18],["Reactive",245],1754646409441]