[{"data":1,"prerenderedAt":375},["ShallowReactive",2],{"$fgukOamtKU1RtUiMFsqdObttmqPPQz0uc7bl_gj_LyX0":3,"$fz9OjKIJcWU7Hcf_DEw8ahJ3YnqPGLz91Zw6OBbbRDQo":245,"article-331":374},{"code":4,"msg":5,"data":6},0,"",{"category":7,"tag":11,"popular":19,"latest":86,"banner":126,"list":151,"cache":244},[8,9,10],"Agent","OpenAI","LLM",[8,12,13,14,9,10,15,16,17,18],"Google","Nvidia","Claude","DeepSeek","OCR","Chat","Generator",[20,29,37,45,54,62,70,79],{"id":21,"publish_date":22,"is_original":23,"collection":5,"cover_url":24,"cover_url_1_1":25,"title":26,"summary":27,"author":28},411,"2023-09-10",1,"article_res/cover/451ef50c225a8dc61c4336506794d13b.jpeg","article_res/cover/3ba9dc7a72f87d40b20fc2d225289ee3.jpeg","Idealism","Reality is created by the mind, we can change our reality by changing our mind. - Plato","Renee's Entrepreneurial Journey",{"id":30,"publish_date":31,"is_original":23,"collection":32,"cover_url":33,"cover_url_1_1":34,"title":35,"summary":36,"author":28},108,"2024-12-07","#LLM #AGI #AI Agent","article_res/cover/0039044422e4ec9f61c18e8ee1693bb0.jpeg","article_res/cover/4220971b108a91d21407d87bb02fbaa6.jpeg","Freysa.ai: The World's First Adversarial AI Agent Game","说服 Freysa 把钱包里的钱都拿出来",{"id":38,"publish_date":39,"is_original":23,"collection":40,"cover_url":41,"cover_url_1_1":42,"title":43,"summary":44,"author":28},12,"2025-03-09","#Oxford #Reasoning #LLM #Tool Use","article_res/cover/d448e9b3617a0b5302e1bd10c438bca9.jpeg","article_res/cover/864a468f9cc4c9317efadb3811909888.jpeg","Agentic Reasoning Framework - Significantly enhance the reasoning ability of LLMs through the integration of external tools using agents","Agentic Reasoning: Reasoning LLMs with Tools for Deep Research",{"id":46,"publish_date":47,"is_original":4,"collection":48,"cover_url":49,"cover_url_1_1":50,"title":51,"summary":52,"author":53},480,"2023-04-14","#Stable Diffusion","article_res/cover/0bdbe7cb1de4a78e54536e5d9afa7ec9.jpeg","article_res/cover/b3d6ffec0608dcfaf18c5a69906d1490.jpeg","【AIGC Learning】Generate Prompts Using Word Graphs - Stable Diffusion Web UI Series 13","AI will become a powerful tool in education, transforming the way we learn and deliver instruction.  \n- Reid Hoffman","--",{"id":55,"publish_date":56,"is_original":4,"collection":57,"cover_url":58,"cover_url_1_1":59,"title":60,"summary":61,"author":28},413,"2023-09-08","#Neuroscience","article_res/cover/74f8302d78a23d9430f22171eae136b6.jpeg","article_res/cover/87ca08af81bb304746be5261160964c0.jpeg","Can machines be conscious?","Do we have an ethical obligation to not turn off conscious machines? Would turning them off be murder? No. I don't lose any sleep over unplugging a conscious machine.\n- Jeff Hawkins, \"A Thousand Brains\"",{"id":63,"publish_date":64,"is_original":23,"collection":65,"cover_url":66,"cover_url_1_1":67,"title":68,"summary":69,"author":28},178,"2024-09-09","#Entrepreneurship","article_res/cover/a7224f025b55d1820408085faef63079.jpeg","article_res/cover/11a9995b096cbf64465ef01b8673b154.jpeg","37signals company","This damn sense of relaxation",{"id":71,"publish_date":72,"is_original":4,"collection":73,"cover_url":74,"cover_url_1_1":75,"title":76,"summary":77,"author":78},460,"2023-05-12","#Google","article_res/cover/b970687b12faa52da976f91248c2aa7b.jpeg","article_res/cover/d1e71b52cfd2c63bc6e71f3e85ff135c.jpeg","Learn what BRC-20 and Ordinals are using Google Bard","Ordinals - a new protocol that allows users to store arbitrary data on the Bitcoin blockchain","Google Bard mainly writes",{"id":80,"publish_date":81,"is_original":23,"collection":5,"cover_url":82,"cover_url_1_1":83,"title":84,"summary":85,"author":28},309,"2024-03-26","article_res/cover/9877f95894ee88532d0e6012c23a2df3.jpeg","article_res/cover/20092164ddc109ce6ae56b1984246751.jpeg","Learning the Cancun Upgrade with lepton and perplexity","Building a quick conversation-based search demo with Lepton AI.",[87,95,103,111,119],{"id":88,"publish_date":89,"is_original":23,"collection":90,"cover_url":91,"cover_url_1_1":92,"title":93,"summary":94,"author":28},627,"2025-03-20","#AI Avatar #AI Video Generation","article_res/cover/d95481358f73924989f8c4ee9c75d1c8.jpeg","article_res/cover/b74bc0fab01f8b6a6aa87696c0c3ed8b.jpeg","DisPose: Generating Animated Videos by Driving Video with Reference Images","DisPose is a controllable human image animation method that enhances video generation.",{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},626,"2025-03-21","#Deep Dive into LLMs #LLM #RL #Andrej Karpathy #AlphaGo","article_res/cover/446553a5c8f8f2f07d97b20eaee84e56.jpeg","article_res/cover/e6c2823409c9b34624064b9acbaca6f1.jpeg","AlphaGo and the Power of Reinforcement Learning - Andrej Karpathy's Deep Dive on LLMs (Part 9)","Simply learning from humans will never surpass human capabilities.",{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},625,"2025-03-22","#Deep Dive into LLMs #LLM #RL #RLHF #Andrej Karpathy","article_res/cover/8da81d38b1e5cf558a164710fd8a5389.jpeg","article_res/cover/96f028d76c362a99a0dd56389e8f7a9b.jpeg","Reinforcement Learning from Human Feedback (RLHF) - Andrej Karpathy's Deep Dive on LLMs (Part 10)","Fine-Tuning Language Models from Human Preferences",{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},624,"2025-03-23","#Deep Dive into LLMs #LLM #Andrej Karpathy #AI Agent #MMM","article_res/cover/a5e7c3d48bb09109684d6513287c661d.jpeg","article_res/cover/d3f22b7c0ab8d82fd2da457a299e0773.jpeg","The Future of Large Language Models - Andrej Karpathy's In-Depth Explanation of LLM (Part 11)","preview of things to come",{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},623,"#Google #Voe #AI Video Generation","article_res/cover/c44062fea0f336c2b96b3928292392c2.jpeg","article_res/cover/a041041c69092ad3db191c5bf3ff981b.jpeg","Trial of Google's video generation model VOE2","Our state-of-the-art video generation model",[127,135,143],{"id":128,"publish_date":129,"is_original":23,"collection":130,"cover_url":131,"cover_url_1_1":132,"title":133,"summary":134,"author":28},300,"2024-04-16","#AI in Science #AGI","article_res/cover/6bf01e793e0f33e848572412eebdf9b0.jpeg","article_res/cover/91a5ee21dafecb914fabeb9430d46ec1.jpeg","Would Einstein lose his job - AI and Quantum Computing: A Glimpse into the Near Future","So Einstein's job is still safe.",{"id":136,"publish_date":137,"is_original":23,"collection":138,"cover_url":139,"cover_url_1_1":140,"title":141,"summary":142,"author":28},101,"2024-12-14","#Nvidia #AI 3D Generator","article_res/cover/693e07c85980c5c0c8fde3f037733f23.jpeg","article_res/cover/9ea8edff2d5d303ff3fffff3f6f9c3d9.jpeg","NVIDIA's open-source 3D project LLaMA-Mesh","LLaMA-Mesh: Unifying 3D Mesh Generation with Language Models",{"id":144,"publish_date":145,"is_original":23,"collection":146,"cover_url":147,"cover_url_1_1":148,"title":149,"summary":150,"author":28},131,"2024-11-10","#OpenAI","article_res/cover/87f8ed353ce39f31960e7cdfaf075a35.jpeg","article_res/cover/f597a63935f5cd32e484b4aadd6019e8.jpeg","ChatGPT has launched the Search function","Get fast, timely answers with links to relevant web sources.",{"big":152,"small":214},[153,181],{"title":154,"list":155},"AGENT",[156,157,165,173],{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":158,"publish_date":159,"is_original":23,"collection":160,"cover_url":161,"cover_url_1_1":162,"title":163,"summary":164,"author":28},622,"2025-03-24","#OWL #AI Agent #MAS #MCP #CUA","article_res/cover/cb50ca7f2bf4d1ed50202d7406e1c19a.jpeg","article_res/cover/4aa7aa3badfacf3cc84121334f1050dd.jpeg","OWL: Multi-agent collaboration","OWL: Optimized Workforce Learning for General Multi-Agent Assistance in Real-World Task Automation",{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},620,"2025-03-26","#LLM #Google #Gemini #AI Agent","article_res/cover/53751a6dbbe990b1eb0b63f3b062aed4.jpeg","article_res/cover/031344981f0a212ff82d1f3a64aa5756.jpeg","Gemini 2.5 Pro, claimed to be far ahead of the competition, has been released with great fanfare: comprehensively surpassing other LLMs and topping the global rankings","Gemini 2.5: Our most intelligent AI model",{"id":174,"publish_date":175,"is_original":23,"collection":176,"cover_url":177,"cover_url_1_1":178,"title":179,"summary":180,"author":28},616,"2025-03-29","#MAS #AI Agent #AI Coder #MetaGPT #MGX","article_res/cover/9dcd702ad2035902e5e77967c34a1f1e.jpeg","article_res/cover/0a97fc4a922753c8f46ff38792020df8.jpeg","MGX - An automated website-building platform composed of multiple AI Agents","Your 24/7 AI Team | Dream, Chat, Create.",{"title":182,"list":183},"OPENAI",[184,191,199,206],{"id":185,"publish_date":167,"is_original":23,"collection":186,"cover_url":187,"cover_url_1_1":188,"title":189,"summary":190,"author":28},619,"#OpenAI #AI Image Generator #4o #MMM #AR Transformer","article_res/cover/2faffc97fcecf3151552cb0fd3206d89.jpeg","article_res/cover/1133cb4948af44cee2e7fbe79efb69e5.jpeg","The native image function of GPT-4o is officially launched","Introducing 4o Image Generation",{"id":192,"publish_date":193,"is_original":4,"collection":194,"cover_url":195,"cover_url_1_1":196,"title":197,"summary":198,"author":28},434,"2023-07-15","#Anthropic #OpenAI #Google #AI Code Generator #Claude","article_res/cover/e1b6f600a2b9f262a4392684e5f2ce25.jpeg","article_res/cover/6e1772e83f78f9a351ab23d3e414adee.jpeg","Latest Updates on Google Bard /Anthropic Claude2 / ChatGPT Code Interpreter","We want our models to use their programming skills to provide more natural interfaces to the basic functions of our computers.  \n - OpenAI",{"id":200,"publish_date":201,"is_original":4,"collection":146,"cover_url":202,"cover_url_1_1":203,"title":204,"summary":205,"author":28},417,"2023-08-24","article_res/cover/bccf897d50a88b18364e35f7466387e0.jpeg","article_res/cover/2f871085c1073717c1703ae86e18056f.jpeg","The GPT-3.5 Turbo fine-tuning (fine-tuning function) has been released～","Developers can now bring their own data to customize GPT-3.5 Turbo for their use cases.",{"id":207,"publish_date":208,"is_original":4,"collection":209,"cover_url":210,"cover_url_1_1":211,"title":212,"summary":213,"author":28},407,"2023-09-22","#OpenAI #AI Image Generator","article_res/cover/c59005e903d35cfc32346e2756e2728a.jpeg","article_res/cover/ba011d265e6d84b5c8cb6fd6b757b6cc.jpeg","Dall-E 3","DALL·E 3 understands significantly more nuance and detail, allowing you to easily translate your ideas into images.",[215,221,241],{"title":10,"list":216},[217,218,219,220],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":223},"GOOGLE",[224,225,226,234],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},615,"2025-03-30","#AI Researcher #AI Science #HKU #Google #AI Agent","article_res/cover/21fadf906067714bb0db31ae13a77c15.jpeg","article_res/cover/2697999a72bd26b22e85f0e92936d3ed.jpeg","AI-Researcher: LLM-driven全自动 scientific research assistant","AI-Researcher: Fully-Automated Scientific Discovery with LLM Agents  \nOpen-Sourced Alternative to Google AI Co-Scientist",{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},463,"2023-05-09","article_res/cover/89800f207723acdb55fc53bf999ebdc9.jpeg","article_res/cover/5764f369b4accd8f83e94aa4c077a175.jpeg","The Smallville sandbox world - A town with 25 virtual residents","Believable proxies of human behavior can empower interactive apps: Immersive environment, Rehearsal space, Prototyping tool",{"title":242,"list":243},"NVIDIA",[],true,{"code":4,"msg":5,"data":246},{"id":247,"publish_date":248,"is_original":4,"collection":249,"articles_id":250,"cover_url":251,"cover_url_1_1":252,"title":253,"summary":254,"author":28,"content":255,"popular":256,"list":321,"category":372,"tag":373},331,"2024-02-04","#AI Game #AI Agent","38rIBsXEs8fdmNrErYrt0A","article_res/cover/a06fb659ab32b0ddcf3ebb6c330fc35f.jpeg","article_res/cover/df5aa34d411e2e9d1a3115cb816fde82.jpeg","Playing Werewolf game through Reinforcement Learning (RL) Agents","We propose a new framework powered by RL to develop strategic language agents, LLM-based agents for Werewolf.","\u003Cdiv class=\"rich_media_content js_underline_content\n                       autoTypeSetting24psection\n            \" id=\"js_content\">\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>). Later, during an exchange with Professor Wu from Tsinghua University, he mentioned that their improved Agents using reinforcement learning (Reinforcement Learning, RL) performed better in the Werewolf game.\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>\"Strategic Play in the Werewolf Game by Language Agents through Reinforcement Learning\" (LANGUAGE AGENTS WITH REINFORCEMENT LEARNING FOR STRATEGIC PLAY IN THE WEREWOLF GAME) https://arxiv.org/pdf/2310.18940.pdf\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>I had previously bookmarked the paper but never had time to read it carefully until today, when I finally had the opportunity to study it thoroughly ✍️.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100003554\" data-ratio=\"0.5601851851851852\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423813072110.785333149203322.png\">\u003C/p>\u003Ch3 data-tool=\"mdnice编辑器\" style='margin-top: 30px;margin-bottom: 15px;font-weight: bold;font-size: 20px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;letter-spacing: normal;text-align: left;text-wrap: wrap;'>Abstract\u003C/h3>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>This paper primarily explores how to use reinforcement learning (RL) to develop intelligent agents for the Werewolf game. Professor Wu's agent first uses LLMs to infer potential deception and generate a series of strategically diverse action plans. Then, through group-based training, an RL strategy is learned to select an action from candidate actions, enhancing the agent’s decision-making ability. By combining LLMs with RL strategies, Professor Wu's agent produces diverse strategies, achieving the highest win rate when competing against other LLM-based agents, while maintaining robustness when playing against adversarial human players in the Werewolf game.\u003C/p>\u003Ch3 data-tool=\"mdnice编辑器\" style='margin-top: 30px;margin-bottom: 15px;font-weight: bold;font-size: 20px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;letter-spacing: normal;text-align: left;text-wrap: wrap;'>Framework\u003C/h3>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>This paper rarely considers the exploitability of their behaviors, tending to take actions with clear strategic patterns, making them easy for real human players to identify during competitions.\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>Professor Wu proposed a framework that combines large language models (LLMs) and reinforcement learning (RL) to build strategic language agents.\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>The agent uses LLMs to organize key information to infer hidden roles and generate a set of diverse action candidates. Then, through group-based training, an RL strategy is learned to produce final actions from the candidate actions, achieving strong strategic gameplay.\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>This LLM-based agent combined with reinforcement learning (RL), which has strategic thinking capabilities, is called a Strategic Language Agent.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100003555\" data-ratio=\"0.3768518518518518\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423813072190.2658755100835055.png\">\u003C/p>\u003Col class=\"list-paddingleft-1\" style=\"list-style-type: decimal;\">\u003Cli>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>Reasoning Judgment: Using LLMs to classify key information and apply reasoning judgment.\u003C/p>\u003C/li>\u003Cli>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>Diverse Action Generation: Prompting LLMs to provide a set of strategically diverse action candidates.\u003C/p>\u003C/li>\u003Cli>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>Group-based RL Training: Learning an RL strategy by playing against itself, its past versions, and a pool of agents.\u003C/p>\u003C/li>\u003C/ol>\u003Ch3 data-tool=\"mdnice编辑器\" style='margin-top: 30px;margin-bottom: 15px;font-weight: bold;font-size: 20px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;letter-spacing: normal;text-align: left;text-wrap: wrap;'>Experimental Results:\u003C/h3>\u003Col data-tool=\"mdnice编辑器\" class=\"list-paddingleft-1\" style='margin-top: 8px;margin-bottom: 8px;padding-left: 25px;width: 557.438px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;'>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">: Displays the win rate comparisons between different agents.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100003556\" data-ratio=\"1.0492307692307692\" data-s=\"300,640\" data-type=\"png\" data-w=\"650\" style=\"\" src=\"./assets/17423813072490.9363672435773769.png\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">: Bold numbers indicate that Professor Wu's agent is more robust than all weakened versions of the agents. Underlined numbers show that, in single-player evaluations, Professor Wu's agent achieved higher win rates than average human players.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100003557\" data-ratio=\"0.2851851851851852\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423813072280.1971872354613693.png\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">: Bold numbers indicate that Professor Wu's RL strategy improves the performance of agents built on unseen LLMs.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100003558\" data-ratio=\"0.2518518518518518\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423813072530.4475704056031362.png\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">: To intuitively demonstrate the benefits of RL training, the action distributions of agents with and without RL strategies are compared, and their behaviors are analyzed under three scenarios to showcase differences.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100003559\" data-ratio=\"0.4166666666666667\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423813082590.10194871886317558.png\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">\u003Cstrong>Comparison with Other Prompting Techniques\u003C/strong>：\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100003560\" data-ratio=\"0.2222222222222222\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423813078570.7430976792781085.png\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">\u003Cstrong>Comparison with Self-play\u003C/strong>：\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100003561\" data-ratio=\"0.2900763358778626\" data-s=\"300,640\" data-type=\"png\" data-w=\"1048\" style=\"\" src=\"./assets/17423813083880.3604208195578311.png\">\u003C/p>\u003C/section>\u003C/li>\u003C/ol>\u003Cp style=\"display: none;\">\u003Cmp-style-type data-value=\"3\">\u003C/mp-style-type>\u003C/p>\u003C/div>",[257,266,275,283,291,298,306,314],{"id":258,"title_md5":259,"publish_date":260,"author_md5":261,"is_original":4,"collection":262,"summary_md5":263,"cover_url":264,"cover_url_1_1":265},608,"f322be5606fcfbd40695c27abfc3328f","2022-03-09","7d83126d7919a9a379875f1ec38011de","#Psychology","c139c5c29b8c93e372a0d367cdc62321","article_res/cover/ef3a6ab91666d3c4d444fa898eb9b7bf.jpeg","article_res/cover/edf2d6b9d836a84920d8cc74b2fc7086.jpeg",{"id":267,"title_md5":268,"publish_date":269,"author_md5":270,"is_original":23,"collection":271,"summary_md5":272,"cover_url":273,"cover_url_1_1":274},320,"2c9845d0ea514ab97728b3b8ce02c0c0","2024-02-29","bc27fa490c4d0d525bac812fc0793534","#Google #AI Game","8526f59437288ceeb09b0cdc4cac85de","article_res/cover/19ab8d13e88c221abdb72ad8f88eda90.jpeg","article_res/cover/0c002aab590ab91f19503d06f320afe4.jpeg",{"id":276,"title_md5":277,"publish_date":278,"author_md5":279,"is_original":4,"collection":5,"summary_md5":280,"cover_url":281,"cover_url_1_1":282},546,"f920184fbbe46872e1c0eaba0c6198cb","2022-05-10","70537b1aee8f8d5429894e91ceae53b9","018edc9254af2f1fa4e13a6ecb5ceb96","article_res/cover/306c699f36a8cc19ff5d3b925cbb140e.jpeg","article_res/cover/d3a80000741cbeea17c008ad002b5613.jpeg",{"id":284,"title_md5":285,"publish_date":286,"author_md5":287,"is_original":4,"collection":5,"summary_md5":288,"cover_url":289,"cover_url_1_1":290},526,"e455bf05d763f483316038387a1dba36","2022-05-30","8b3607d0f4181a3cb6ffdccf7185f09b","1fd7171c05ccb597812d28c406effa91","article_res/cover/3ba9b9a46eb1482e63d2235ea7a702cf.jpeg","article_res/cover/29ecda84d373f784e9761d39c31f80eb.jpeg",{"id":292,"title_md5":293,"publish_date":294,"author_md5":287,"is_original":4,"collection":5,"summary_md5":295,"cover_url":296,"cover_url_1_1":297},571,"61bca3f1fd755515b5bfafda3f667c1e","2022-04-15","a4e05af28e9fbed3c1ff2bbe106c505d","article_res/cover/ae56bce26e77b0fd07649773ef93454c.jpeg","article_res/cover/14e0a9222470370a4d81fb8562f19069.jpeg",{"id":299,"title_md5":300,"publish_date":301,"author_md5":270,"is_original":23,"collection":302,"summary_md5":303,"cover_url":304,"cover_url_1_1":305},113,"9f504b7a56813ffa5fa9341d15fbc729","2024-12-02","#AI Code Generator","9a1b04d037898d1c1b26f79f86bd8a2c","article_res/cover/e4427f0045a692beb943b01951e35332.jpeg","article_res/cover/54ec911144ff879c075b530bdebd5c88.jpeg",{"id":307,"title_md5":308,"publish_date":309,"author_md5":270,"is_original":23,"collection":310,"summary_md5":311,"cover_url":312,"cover_url_1_1":313},236,"fb2902d49d48930fde5eb60bc4514657","2024-06-24","#AI Grant","2a78bd9e32d06e085d4f477fe978b9a4","article_res/cover/03a7cf88a6fad78e165f4e300b41a04c.jpeg","article_res/cover/0686015569acf3ce812cf206f4cf2906.jpeg",{"id":315,"title_md5":316,"publish_date":317,"author_md5":270,"is_original":4,"collection":5,"summary_md5":318,"cover_url":319,"cover_url_1_1":320},438,"7a0b09d1cf7dbaabb64afade625bf678","2023-07-02","9e36e4a6b9c61b5514379aca1c3c974f","article_res/cover/e46409b4a2b82038eefba36457b56b38.jpeg","article_res/cover/9439fb23cfe4a6806b6b59c00a01ae33.jpeg",{"related":322,"small":357},[323,324,332,341,349],{"id":192,"publish_date":193,"is_original":4,"collection":194,"cover_url":195,"cover_url_1_1":196,"title":197,"summary":198,"author":28},{"id":325,"publish_date":326,"is_original":23,"collection":327,"cover_url":328,"cover_url_1_1":329,"title":330,"summary":331,"author":28},246,"2024-06-14","#AI Game","article_res/cover/bb761d6a72b8800406032d2cfadc1eca.jpeg","article_res/cover/d9e1cb5455d969e3e1392af062a8d9c1.jpeg","Anti-Turing Test - AI NPCs","A group with the most advanced AIs in the world try to figure out who among them is the human",{"id":333,"publish_date":334,"is_original":4,"collection":335,"cover_url":336,"cover_url_1_1":337,"title":338,"summary":339,"author":340},476,"2023-04-18","#AI 3D Generator","article_res/cover/4bf87096649f9f42597ca3bec98185f1.jpeg","article_res/cover/596ca8d93d90fe4f40ee9eefd72a222b.jpeg","Various AIGC 3D tools (Part 1)","GPT creates realistic 3D models and renders of buildings based on sketches and specifications.  \n-","AIGC Learning Notes",{"id":342,"publish_date":343,"is_original":4,"collection":5,"cover_url":344,"cover_url_1_1":345,"title":346,"summary":347,"author":348},582,"2022-04-04","article_res/cover/773216ade9dc1d102c275d3eb44a12a0.jpeg","article_res/cover/b915bf644bcab2e1edce50560d9c6144.jpeg","Messari Report Translation and Summary 【Permanent (Risk) Capital: In, Up and Down, But Never Out】","The institutions are actually here this time.","Translation",{"id":350,"publish_date":351,"is_original":4,"collection":5,"cover_url":352,"cover_url_1_1":353,"title":354,"summary":355,"author":356},533,"2022-05-23","article_res/cover/3417fd6411336e06145ba11a7c42de79.jpeg","article_res/cover/ad8f05689ac820a914f888b79ebf20a7.jpeg","Stereotypical biases about gender","\"Each time a woman stands up for herself, she stands up for all women.\" –Maya Angelou","Essay",[358,364,370],{"title":10,"list":359},[360,361,362,363],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":365},[366,367,368,369],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},{"title":242,"list":371},[],[8,9,10],[8,12,13,14,9,10,15,16,17,18],["Reactive",245],1754646415278]