[{"data":1,"prerenderedAt":379},["ShallowReactive",2],{"$fgukOamtKU1RtUiMFsqdObttmqPPQz0uc7bl_gj_LyX0":3,"$fZgmxrs0Z_y8qKsyU03KauwiahuyQr_DEgyfKB_g-c_I":245,"article-422":378},{"code":4,"msg":5,"data":6},0,"",{"category":7,"tag":11,"popular":19,"latest":86,"banner":126,"list":151,"cache":244},[8,9,10],"Agent","OpenAI","LLM",[8,12,13,14,9,10,15,16,17,18],"Google","Nvidia","Claude","DeepSeek","OCR","Chat","Generator",[20,29,37,45,54,62,70,79],{"id":21,"publish_date":22,"is_original":23,"collection":5,"cover_url":24,"cover_url_1_1":25,"title":26,"summary":27,"author":28},411,"2023-09-10",1,"article_res/cover/451ef50c225a8dc61c4336506794d13b.jpeg","article_res/cover/3ba9dc7a72f87d40b20fc2d225289ee3.jpeg","Idealism","Reality is created by the mind, we can change our reality by changing our mind. - Plato","Renee's Entrepreneurial Journey",{"id":30,"publish_date":31,"is_original":23,"collection":32,"cover_url":33,"cover_url_1_1":34,"title":35,"summary":36,"author":28},108,"2024-12-07","#LLM #AGI #AI Agent","article_res/cover/0039044422e4ec9f61c18e8ee1693bb0.jpeg","article_res/cover/4220971b108a91d21407d87bb02fbaa6.jpeg","Freysa.ai: The World's First Adversarial AI Agent Game","说服 Freysa 把钱包里的钱都拿出来",{"id":38,"publish_date":39,"is_original":23,"collection":40,"cover_url":41,"cover_url_1_1":42,"title":43,"summary":44,"author":28},12,"2025-03-09","#Oxford #Reasoning #LLM #Tool Use","article_res/cover/d448e9b3617a0b5302e1bd10c438bca9.jpeg","article_res/cover/864a468f9cc4c9317efadb3811909888.jpeg","Agentic Reasoning Framework - Significantly enhance the reasoning ability of LLMs through the integration of external tools using agents","Agentic Reasoning: Reasoning LLMs with Tools for Deep Research",{"id":46,"publish_date":47,"is_original":4,"collection":48,"cover_url":49,"cover_url_1_1":50,"title":51,"summary":52,"author":53},480,"2023-04-14","#Stable Diffusion","article_res/cover/0bdbe7cb1de4a78e54536e5d9afa7ec9.jpeg","article_res/cover/b3d6ffec0608dcfaf18c5a69906d1490.jpeg","【AIGC Learning】Generate Prompts Using Word Graphs - Stable Diffusion Web UI Series 13","AI will become a powerful tool in education, transforming the way we learn and deliver instruction.  \n- Reid Hoffman","--",{"id":55,"publish_date":56,"is_original":4,"collection":57,"cover_url":58,"cover_url_1_1":59,"title":60,"summary":61,"author":28},413,"2023-09-08","#Neuroscience","article_res/cover/74f8302d78a23d9430f22171eae136b6.jpeg","article_res/cover/87ca08af81bb304746be5261160964c0.jpeg","Can machines be conscious?","Do we have an ethical obligation to not turn off conscious machines? Would turning them off be murder? No. I don't lose any sleep over unplugging a conscious machine.\n- Jeff Hawkins, \"A Thousand Brains\"",{"id":63,"publish_date":64,"is_original":23,"collection":65,"cover_url":66,"cover_url_1_1":67,"title":68,"summary":69,"author":28},178,"2024-09-09","#Entrepreneurship","article_res/cover/a7224f025b55d1820408085faef63079.jpeg","article_res/cover/11a9995b096cbf64465ef01b8673b154.jpeg","37signals company","This damn sense of relaxation",{"id":71,"publish_date":72,"is_original":4,"collection":73,"cover_url":74,"cover_url_1_1":75,"title":76,"summary":77,"author":78},460,"2023-05-12","#Google","article_res/cover/b970687b12faa52da976f91248c2aa7b.jpeg","article_res/cover/d1e71b52cfd2c63bc6e71f3e85ff135c.jpeg","Learn what BRC-20 and Ordinals are using Google Bard","Ordinals - a new protocol that allows users to store arbitrary data on the Bitcoin blockchain","Google Bard mainly writes",{"id":80,"publish_date":81,"is_original":23,"collection":5,"cover_url":82,"cover_url_1_1":83,"title":84,"summary":85,"author":28},309,"2024-03-26","article_res/cover/9877f95894ee88532d0e6012c23a2df3.jpeg","article_res/cover/20092164ddc109ce6ae56b1984246751.jpeg","Learning the Cancun Upgrade with lepton and perplexity","Building a quick conversation-based search demo with Lepton AI.",[87,95,103,111,119],{"id":88,"publish_date":89,"is_original":23,"collection":90,"cover_url":91,"cover_url_1_1":92,"title":93,"summary":94,"author":28},627,"2025-03-20","#AI Avatar #AI Video Generation","article_res/cover/d95481358f73924989f8c4ee9c75d1c8.jpeg","article_res/cover/b74bc0fab01f8b6a6aa87696c0c3ed8b.jpeg","DisPose: Generating Animated Videos by Driving Video with Reference Images","DisPose is a controllable human image animation method that enhances video generation.",{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},626,"2025-03-21","#Deep Dive into LLMs #LLM #RL #Andrej Karpathy #AlphaGo","article_res/cover/446553a5c8f8f2f07d97b20eaee84e56.jpeg","article_res/cover/e6c2823409c9b34624064b9acbaca6f1.jpeg","AlphaGo and the Power of Reinforcement Learning - Andrej Karpathy's Deep Dive on LLMs (Part 9)","Simply learning from humans will never surpass human capabilities.",{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},625,"2025-03-22","#Deep Dive into LLMs #LLM #RL #RLHF #Andrej Karpathy","article_res/cover/8da81d38b1e5cf558a164710fd8a5389.jpeg","article_res/cover/96f028d76c362a99a0dd56389e8f7a9b.jpeg","Reinforcement Learning from Human Feedback (RLHF) - Andrej Karpathy's Deep Dive on LLMs (Part 10)","Fine-Tuning Language Models from Human Preferences",{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},624,"2025-03-23","#Deep Dive into LLMs #LLM #Andrej Karpathy #AI Agent #MMM","article_res/cover/a5e7c3d48bb09109684d6513287c661d.jpeg","article_res/cover/d3f22b7c0ab8d82fd2da457a299e0773.jpeg","The Future of Large Language Models - Andrej Karpathy's In-Depth Explanation of LLM (Part 11)","preview of things to come",{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},623,"#Google #Voe #AI Video Generation","article_res/cover/c44062fea0f336c2b96b3928292392c2.jpeg","article_res/cover/a041041c69092ad3db191c5bf3ff981b.jpeg","Trial of Google's video generation model VOE2","Our state-of-the-art video generation model",[127,135,143],{"id":128,"publish_date":129,"is_original":23,"collection":130,"cover_url":131,"cover_url_1_1":132,"title":133,"summary":134,"author":28},300,"2024-04-16","#AI in Science #AGI","article_res/cover/6bf01e793e0f33e848572412eebdf9b0.jpeg","article_res/cover/91a5ee21dafecb914fabeb9430d46ec1.jpeg","Would Einstein lose his job - AI and Quantum Computing: A Glimpse into the Near Future","So Einstein's job is still safe.",{"id":136,"publish_date":137,"is_original":23,"collection":138,"cover_url":139,"cover_url_1_1":140,"title":141,"summary":142,"author":28},101,"2024-12-14","#Nvidia #AI 3D Generator","article_res/cover/693e07c85980c5c0c8fde3f037733f23.jpeg","article_res/cover/9ea8edff2d5d303ff3fffff3f6f9c3d9.jpeg","NVIDIA's open-source 3D project LLaMA-Mesh","LLaMA-Mesh: Unifying 3D Mesh Generation with Language Models",{"id":144,"publish_date":145,"is_original":23,"collection":146,"cover_url":147,"cover_url_1_1":148,"title":149,"summary":150,"author":28},131,"2024-11-10","#OpenAI","article_res/cover/87f8ed353ce39f31960e7cdfaf075a35.jpeg","article_res/cover/f597a63935f5cd32e484b4aadd6019e8.jpeg","ChatGPT has launched the Search function","Get fast, timely answers with links to relevant web sources.",{"big":152,"small":214},[153,181],{"title":154,"list":155},"AGENT",[156,157,165,173],{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":158,"publish_date":159,"is_original":23,"collection":160,"cover_url":161,"cover_url_1_1":162,"title":163,"summary":164,"author":28},622,"2025-03-24","#OWL #AI Agent #MAS #MCP #CUA","article_res/cover/cb50ca7f2bf4d1ed50202d7406e1c19a.jpeg","article_res/cover/4aa7aa3badfacf3cc84121334f1050dd.jpeg","OWL: Multi-agent collaboration","OWL: Optimized Workforce Learning for General Multi-Agent Assistance in Real-World Task Automation",{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},620,"2025-03-26","#LLM #Google #Gemini #AI Agent","article_res/cover/53751a6dbbe990b1eb0b63f3b062aed4.jpeg","article_res/cover/031344981f0a212ff82d1f3a64aa5756.jpeg","Gemini 2.5 Pro, claimed to be far ahead of the competition, has been released with great fanfare: comprehensively surpassing other LLMs and topping the global rankings","Gemini 2.5: Our most intelligent AI model",{"id":174,"publish_date":175,"is_original":23,"collection":176,"cover_url":177,"cover_url_1_1":178,"title":179,"summary":180,"author":28},616,"2025-03-29","#MAS #AI Agent #AI Coder #MetaGPT #MGX","article_res/cover/9dcd702ad2035902e5e77967c34a1f1e.jpeg","article_res/cover/0a97fc4a922753c8f46ff38792020df8.jpeg","MGX - An automated website-building platform composed of multiple AI Agents","Your 24/7 AI Team | Dream, Chat, Create.",{"title":182,"list":183},"OPENAI",[184,191,199,206],{"id":185,"publish_date":167,"is_original":23,"collection":186,"cover_url":187,"cover_url_1_1":188,"title":189,"summary":190,"author":28},619,"#OpenAI #AI Image Generator #4o #MMM #AR Transformer","article_res/cover/2faffc97fcecf3151552cb0fd3206d89.jpeg","article_res/cover/1133cb4948af44cee2e7fbe79efb69e5.jpeg","The native image function of GPT-4o is officially launched","Introducing 4o Image Generation",{"id":192,"publish_date":193,"is_original":4,"collection":194,"cover_url":195,"cover_url_1_1":196,"title":197,"summary":198,"author":28},434,"2023-07-15","#Anthropic #OpenAI #Google #AI Code Generator #Claude","article_res/cover/e1b6f600a2b9f262a4392684e5f2ce25.jpeg","article_res/cover/6e1772e83f78f9a351ab23d3e414adee.jpeg","Latest Updates on Google Bard /Anthropic Claude2 / ChatGPT Code Interpreter","We want our models to use their programming skills to provide more natural interfaces to the basic functions of our computers.  \n - OpenAI",{"id":200,"publish_date":201,"is_original":4,"collection":146,"cover_url":202,"cover_url_1_1":203,"title":204,"summary":205,"author":28},417,"2023-08-24","article_res/cover/bccf897d50a88b18364e35f7466387e0.jpeg","article_res/cover/2f871085c1073717c1703ae86e18056f.jpeg","The GPT-3.5 Turbo fine-tuning (fine-tuning function) has been released～","Developers can now bring their own data to customize GPT-3.5 Turbo for their use cases.",{"id":207,"publish_date":208,"is_original":4,"collection":209,"cover_url":210,"cover_url_1_1":211,"title":212,"summary":213,"author":28},407,"2023-09-22","#OpenAI #AI Image Generator","article_res/cover/c59005e903d35cfc32346e2756e2728a.jpeg","article_res/cover/ba011d265e6d84b5c8cb6fd6b757b6cc.jpeg","Dall-E 3","DALL·E 3 understands significantly more nuance and detail, allowing you to easily translate your ideas into images.",[215,221,241],{"title":10,"list":216},[217,218,219,220],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":223},"GOOGLE",[224,225,226,234],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},615,"2025-03-30","#AI Researcher #AI Science #HKU #Google #AI Agent","article_res/cover/21fadf906067714bb0db31ae13a77c15.jpeg","article_res/cover/2697999a72bd26b22e85f0e92936d3ed.jpeg","AI-Researcher: LLM-driven全自动 scientific research assistant","AI-Researcher: Fully-Automated Scientific Discovery with LLM Agents  \nOpen-Sourced Alternative to Google AI Co-Scientist",{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},463,"2023-05-09","article_res/cover/89800f207723acdb55fc53bf999ebdc9.jpeg","article_res/cover/5764f369b4accd8f83e94aa4c077a175.jpeg","The Smallville sandbox world - A town with 25 virtual residents","Believable proxies of human behavior can empower interactive apps: Immersive environment, Rehearsal space, Prototyping tool",{"title":242,"list":243},"NVIDIA",[],true,{"code":4,"msg":5,"data":246},{"id":247,"publish_date":248,"is_original":4,"collection":249,"articles_id":250,"cover_url":251,"cover_url_1_1":252,"title":253,"summary":254,"author":53,"content":255,"popular":256,"list":319,"category":376,"tag":377},422,"2023-08-09","#LLM","wNL1CkpnEH8oq3_DATqA_g","article_res/cover/333388145626f84ced5762dba425a64d.jpeg","article_res/cover/27249377525a79a36fc10bd7c09ee374.jpeg","DPO vs RLHF","DPO is able to bypass the reward modeling phase and optimize directly for the preferences expressed in the preference data.","\u003Cdiv class=\"rich_media_content js_underline_content\n                       defaultNoSetting\n            \" id=\"js_content\">\u003Csection data-tool=\"markdown编辑器\" data-website=\"https://markdown.com.cn/editor\" style='font-size: 16px;font-style: normal;font-variant-caps: normal;font-weight: 400;letter-spacing: 0px;orphans: auto;text-indent: 0px;text-transform: none;white-space: normal;widows: auto;word-spacing: 0px;-webkit-tap-highlight-color: rgba(26, 26, 26, 0.3);-webkit-text-size-adjust: auto;-webkit-text-stroke-width: 0px;text-decoration: none;color: black;padding: 25px 30px;line-height: 1.6;word-break: break-word;overflow-wrap: break-word;text-align: justify;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;margin-top: -10px;'>\u003Cp data-tool=\"markdown.com.cn编辑器\" style=\"font-size: 16px;padding-top: 8px;padding-bottom: 8px;margin: 0px;line-height: 26px;color: black;\">is Direct Preference Optimization\u003C/p>\u003Cp data-tool=\"markdown.com.cn编辑器\" style=\"font-size: 16px;padding-top: 8px;padding-bottom: 8px;margin: 0px;line-height: 26px;color: black;\">is Reinforcement Learning from Human Feedback\u003C/p>\u003Ch2 data-tool=\"markdown.com.cn编辑器\" style=\"margin-top: 30px;margin-bottom: 15px;font-weight: bold;color: black;font-size: 22px;\">\u003Cspan class=\"content\">Let's first talk about RLHF\u003C/span>\u003Cspan class=\"suffix\">\u003C/span>\u003C/h2>\u003Cp data-tool=\"markdown.com.cn编辑器\" style=\"font-size: 16px;padding-top: 8px;padding-bottom: 8px;margin: 0px;line-height: 26px;color: black;\">OpenAI GPT relies on a new large language model (LLM) training paradigm: namely RLHF (Reinforcement Learning from Human Feedback). In short, it is a method of reinforcement learning optimization through human feedback.\u003C/p>\u003Cp data-tool=\"markdown.com.cn编辑器\" style=\"font-size: 16px;padding-top: 8px;padding-bottom: 8px;margin: 0px;line-height: 26px;color: black;\">Before this, LLMs mainly generated responses based on human input prompts (prompts), and such evaluations were typically subjective and context-dependent. Traditional models usually just predicted the next word and used simple loss functions (such as cross-entropy), without explicitly incorporating human preferences and subjective opinions.\u003C/p>\u003Cp data-tool=\"markdown.com.cn编辑器\" style=\"font-size: 16px;padding-top: 8px;padding-bottom: 8px;margin: 0px;line-height: 26px;color: black;\">Then RLHF was introduced. This strategy uses human feedback on generated texts as an evaluation criterion, and even incorporates this feedback into the loss function for optimizing the model. Simply put, it uses reinforcement learning methods to directly optimize a language model that takes human feedback into account. This ensures that the language model can better align with complex human values.\u003C/p>\u003Cp data-tool=\"markdown.com.cn编辑器\" style=\"font-size: 16px;padding-top: 8px;padding-bottom: 8px;margin: 0px;line-height: 26px;color: black;\">RLHF mainly consists of three steps:\u003C/p>\u003Col data-tool=\"markdown.com.cn编辑器\" style=\"margin-top: 8px;margin-bottom: 8px;padding-left: 25px;color: black;list-style-type: decimal;\">\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;text-align: left;color: rgb(1, 1, 1);font-weight: 500;\">Pre-training a language model (LM).\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;text-align: left;color: rgb(1, 1, 1);font-weight: 500;\">Aggregating question-and-answer data and using it to train a reward model (Reward Model, RM).\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;text-align: left;color: rgb(1, 1, 1);font-weight: 500;\">Fine-tuning the LM using reinforcement learning (RL).\u003C/section>\u003C/li>\u003C/ol>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;text-align: left;color: rgb(1, 1, 1);font-weight: 500;\">\u003Csection>\u003Csection style=\"display: inline-block;\">\u003Cimg data-ratio=\"0.3333333333333333\" data-type=\"jpg\" data-w=\"2085\" src=\"./assets/17434959630470.9399600891244193.jpeg\">\u003C/section>​\u003C/section>\u003C/section>\u003Ch2 data-tool=\"markdown.com.cn编辑器\" style=\"margin-top: 30px;margin-bottom: 15px;font-weight: bold;color: black;font-size: 22px;\">\u003Cspan class=\"content\">Let's talk about DPO.\u003C/span>\u003Cspan class=\"suffix\">\u003C/span>\u003C/h2>\u003Cp data-tool=\"markdown.com.cn编辑器\" style=\"font-size: 16px;padding-top: 8px;padding-bottom: 8px;margin: 0px;line-height: 26px;color: black;\">Although RLHF introduces the concept of human preferences and provides a method for integrating reinforcement learning with large language models, it often appears complex and unstable in practical applications. Its working principle is to first fit a reward model to capture human preferences, then fine-tune a large unsupervised learning model to maximize these rewards while trying to stay as close as possible to the original model.\u003C/p>\u003Cp data-tool=\"markdown.com.cn编辑器\" style=\"font-size: 16px;padding-top: 8px;padding-bottom: 8px;margin: 0px;line-height: 26px;color: black;\">To address these issues, researchers proposed the DPO algorithm. DPO not only directly uses the mapping between the reward function and the optimal strategy but also proves that the constrained reward maximization problem can be fully optimized through single-stage policy training. Essentially, DPO provides a solution to the classification problem based on human preference data.\u003C/p>\u003Csection>\u003Csection style=\"display: inline-block;\">\u003Cimg data-ratio=\"0.21357142857142858\" data-type=\"jpg\" data-w=\"1400\" src=\"./assets/17434959630430.8616027496594765.jpeg\">\u003C/section>​\u003C/section>\u003Cp data-tool=\"markdown.com.cn编辑器\" style=\"font-size: 16px;padding-top: 8px;padding-bottom: 8px;margin: 0px;line-height: 26px;color: black;\">Compared with RLHF, DPO has many advantages:\u003C/p>\u003Col data-tool=\"markdown.com.cn编辑器\" style=\"margin-top: 8px;margin-bottom: 8px;padding-left: 25px;color: black;list-style-type: decimal;\">\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;text-align: left;color: rgb(1, 1, 1);font-weight: 500;\">It offers higher stability and computational efficiency.\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;text-align: left;color: rgb(1, 1, 1);font-weight: 500;\">It does not require fitting a reward model or sampling during fine-tuning.\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;text-align: left;color: rgb(1, 1, 1);font-weight: 500;\">It reduces the reliance on a large number of hyperparameters.\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;text-align: left;color: rgb(1, 1, 1);font-weight: 500;\">DPO can more effectively fine-tune LMs to align with human preferences, often surpassing existing methods.\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;text-align: left;color: rgb(1, 1, 1);font-weight: 500;\">Fine-tuning with DPO performs better in controlling the sentiment of generated results, improving the quality of summaries and single-turn dialogue responses.\u003C/section>\u003C/li>\u003C/ol>\u003Cp data-tool=\"markdown.com.cn编辑器\" style=\"font-size: 16px;padding-top: 8px;padding-bottom: 8px;margin: 0px;line-height: 26px;color: black;\">You can view the detailed research paper on DPO at https://arxiv.org/abs/2305.18290\u003C/p>\u003Cp data-tool=\"markdown.com.cn编辑器\" style=\"font-size: 16px;padding-top: 8px;padding-bottom: 8px;margin: 0px;line-height: 26px;color: black;\">Here we can see the performance comparison between DPO and RLHF (PPO is a reinforcement learning algorithm under the RLHF framework):\u003C/p>\u003Csection>\u003Csection style=\"display: inline-block;\">\u003Cimg data-ratio=\"0.37969543147208124\" data-type=\"jpg\" data-w=\"985\" src=\"./assets/17434959636090.2335555212334901.jpeg\">\u003C/section>​\u003C/section>\u003Cp data-tool=\"markdown.com.cn编辑器\" style=\"font-size: 16px;padding-top: 8px;padding-bottom: 8px;margin: 0px;line-height: 26px;color: black;\">Reinforcement learning is a more difficult and unstable method, and so far only OpenAI and Anthropic have successfully implemented it. Many open-source models have not seen significant performance improvements after adopting RLHF. However, with the emergence of new methods like DPO, reinforcement learning is no longer the only option.\u003C/p>\u003C/section>\u003Cp style=\"display: none;\">\u003Cmp-style-type data-value=\"10000\">\u003C/mp-style-type>\u003C/p>\u003C/div>",[257,265,273,281,289,297,304,311],{"id":258,"title_md5":259,"publish_date":260,"author_md5":261,"is_original":23,"collection":5,"summary_md5":262,"cover_url":263,"cover_url_1_1":264},197,"a971baff48148b8ed9acc1ca7ab99c18","2024-08-14","bc27fa490c4d0d525bac812fc0793534","0eed6772828808601f389b4bac8f4d0a","article_res/cover/22174743c1501f73c6b18fe0c76eaba5.jpeg","article_res/cover/a4056722ad71dc449138929b900a70fd.jpeg",{"id":266,"title_md5":267,"publish_date":268,"author_md5":261,"is_original":23,"collection":269,"summary_md5":270,"cover_url":271,"cover_url_1_1":272},351,"be689590d76ec63d2a5fa799058ac7b4","2024-01-04","#Tencent #AI Agent","ef991913a10b83e4e85fec169f91f75b","article_res/cover/6cbc4e6da87f70a4653a0103da6fea4e.jpeg","article_res/cover/21a5401f37fda9c0a18f9c02caffba20.jpeg",{"id":274,"title_md5":275,"publish_date":276,"author_md5":261,"is_original":23,"collection":277,"summary_md5":278,"cover_url":279,"cover_url_1_1":280},224,"6750b90e233de5af6a744a3ec322b2c9","2024-07-09","#AI Avatar","aa36a9e66a4e0603e8124b0c52a669cf","article_res/cover/670fbb56c51f582e4b359a0b5c9eefd0.jpeg","article_res/cover/c9a84424a4c803f0e2565e847e9f36ac.jpeg",{"id":282,"title_md5":283,"publish_date":284,"author_md5":261,"is_original":23,"collection":285,"summary_md5":286,"cover_url":287,"cover_url_1_1":288},243,"e5ca21c7fa4d1349f626405b201e791a","2024-06-17","#AI Avatar #Tencent","fd100989176d762c4038f957d4bd8730","article_res/cover/18411086f214e950ca1ad80957d15d52.jpeg","article_res/cover/fa639bdf4a5ba7330027539519841b0c.jpeg",{"id":290,"title_md5":291,"publish_date":292,"author_md5":261,"is_original":23,"collection":293,"summary_md5":294,"cover_url":295,"cover_url_1_1":296},105,"47df25f2fa9c349c014a0927aed53f39","2024-12-10","#OpenAI #Sora #AI Video Generator","aa56a40ed0fcd4abde24bab99f72a8b2","article_res/cover/2669eb18ee8aaf134188583d5a3f58d7.jpeg","article_res/cover/d7f0cd0587c16a24176c21878cacbf22.jpeg",{"id":298,"title_md5":299,"publish_date":300,"author_md5":261,"is_original":23,"collection":65,"summary_md5":301,"cover_url":302,"cover_url_1_1":303},343,"3c0057d528592cb984384a8645b57009","2024-01-18","4a88772fa48babb8342b8f441745c82c","article_res/cover/25b7348465e89a554f7caa0ef8b4e443.jpeg","article_res/cover/e744744446f06238e135470f003b91d5.jpeg",{"id":305,"title_md5":306,"publish_date":307,"author_md5":261,"is_original":23,"collection":5,"summary_md5":308,"cover_url":309,"cover_url_1_1":310},412,"cbf43706fafce6a21828b18631d306ce","2023-09-09","1ea71279dd86997b72b064fd0085a0af","article_res/cover/2a7cc4836df072904a7c6587411fd1c8.jpeg","article_res/cover/6e99c94d589e69cff3566829cc098202.jpeg",{"id":312,"title_md5":313,"publish_date":314,"author_md5":261,"is_original":23,"collection":315,"summary_md5":316,"cover_url":317,"cover_url_1_1":318},136,"e9d61e351c9f5eb31f66c4d03e93c23a","2024-11-05","#AI Video Generator #AI Image Generator","211284e173d737bed036f64706586065","article_res/cover/86ff6ea34947c25e1cebdd34b261580e.jpeg","article_res/cover/4bcc2e0fd3a3e90d5565ff8b15e16d33.jpeg",{"related":320,"small":361},[321,329,337,345,353],{"id":322,"publish_date":323,"is_original":23,"collection":324,"cover_url":325,"cover_url_1_1":326,"title":327,"summary":328,"author":28},291,"2024-04-27","#AI 3D Generator","article_res/cover/da746312f3858a01f966e1c17ad1f9a3.jpeg","article_res/cover/53d099e12e83ee7f5c777e5320cbd173.jpeg","AI Generate 3D - AIUNI and Polycam trials","We believe that 3D capture is for everyone, so we made it easy. - Polycam",{"id":330,"publish_date":331,"is_original":23,"collection":332,"cover_url":333,"cover_url_1_1":334,"title":335,"summary":336,"author":28},311,"2024-03-24","#World Model","article_res/cover/24d0e50c27456407c35f246f65dece53.jpeg","article_res/cover/bacb8f187f43e13e0b37caef63ca999e.jpeg","Large World Model (LWM) - Berkeley's Large World Model","World Model on Million-Length Video and Language with Blockwise Ring Attention",{"id":338,"publish_date":339,"is_original":23,"collection":340,"cover_url":341,"cover_url_1_1":342,"title":343,"summary":344,"author":28},346,"2024-01-10","#Meta #AI Avatar","article_res/cover/d61bf1e67c7a47df3eba82ad12f33a59.jpeg","article_res/cover/13a49df63a291d41c384c18329d4099c.jpeg","Meta's Audio2Photoreal - From sound to virtual humans in motion.","From Audio to Photoreal Embodiment: Synthesizing Humans in Conversations",{"id":346,"publish_date":347,"is_original":23,"collection":348,"cover_url":349,"cover_url_1_1":350,"title":351,"summary":352,"author":28},252,"2024-06-08","#Alibaba","article_res/cover/480dfe8c509b19b5e8c3746f9659fd96.jpeg","article_res/cover/a6cdb22ad4c25c9727ce2a23a3374599.jpeg","Add text to images - AnyText","AnyText: Multilingual Visual Text Generation And Editing",{"id":354,"publish_date":355,"is_original":23,"collection":356,"cover_url":357,"cover_url_1_1":358,"title":359,"summary":360,"author":28},352,"2024-01-03","#AGI #AI Agent","article_res/cover/5a90dc2f33c6472e243ddf7ba925e2d1.jpeg","article_res/cover/6064536658bfbf1176aa0fa3c5fe640e.jpeg","AI Agent Application Market Map","Autonomous agents have long been a prominent research focus in both academic and industry communities",[362,368,374],{"title":10,"list":363},[364,365,366,367],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":369},[370,371,372,373],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},{"title":242,"list":375},[],[8,9,10],[8,12,13,14,9,10,15,16,17,18],["Reactive",245],1754646416917]