[{"data":1,"prerenderedAt":379},["ShallowReactive",2],{"$fgukOamtKU1RtUiMFsqdObttmqPPQz0uc7bl_gj_LyX0":3,"$f0b0UwPWKw5LJZ1FzVX42eGtCsCmxcEUmMLbyvGBd7dc":245,"article-141":378},{"code":4,"msg":5,"data":6},0,"",{"category":7,"tag":11,"popular":19,"latest":86,"banner":126,"list":151,"cache":244},[8,9,10],"Agent","OpenAI","LLM",[8,12,13,14,9,10,15,16,17,18],"Google","Nvidia","Claude","DeepSeek","OCR","Chat","Generator",[20,29,37,45,54,62,70,79],{"id":21,"publish_date":22,"is_original":23,"collection":5,"cover_url":24,"cover_url_1_1":25,"title":26,"summary":27,"author":28},411,"2023-09-10",1,"article_res/cover/451ef50c225a8dc61c4336506794d13b.jpeg","article_res/cover/3ba9dc7a72f87d40b20fc2d225289ee3.jpeg","Idealism","Reality is created by the mind, we can change our reality by changing our mind. - Plato","Renee's Entrepreneurial Journey",{"id":30,"publish_date":31,"is_original":23,"collection":32,"cover_url":33,"cover_url_1_1":34,"title":35,"summary":36,"author":28},108,"2024-12-07","#LLM #AGI #AI Agent","article_res/cover/0039044422e4ec9f61c18e8ee1693bb0.jpeg","article_res/cover/4220971b108a91d21407d87bb02fbaa6.jpeg","Freysa.ai: The World's First Adversarial AI Agent Game","说服 Freysa 把钱包里的钱都拿出来",{"id":38,"publish_date":39,"is_original":23,"collection":40,"cover_url":41,"cover_url_1_1":42,"title":43,"summary":44,"author":28},12,"2025-03-09","#Oxford #Reasoning #LLM #Tool Use","article_res/cover/d448e9b3617a0b5302e1bd10c438bca9.jpeg","article_res/cover/864a468f9cc4c9317efadb3811909888.jpeg","Agentic Reasoning Framework - Significantly enhance the reasoning ability of LLMs through the integration of external tools using agents","Agentic Reasoning: Reasoning LLMs with Tools for Deep Research",{"id":46,"publish_date":47,"is_original":4,"collection":48,"cover_url":49,"cover_url_1_1":50,"title":51,"summary":52,"author":53},480,"2023-04-14","#Stable Diffusion","article_res/cover/0bdbe7cb1de4a78e54536e5d9afa7ec9.jpeg","article_res/cover/b3d6ffec0608dcfaf18c5a69906d1490.jpeg","【AIGC Learning】Generate Prompts Using Word Graphs - Stable Diffusion Web UI Series 13","AI will become a powerful tool in education, transforming the way we learn and deliver instruction.  \n- Reid Hoffman","--",{"id":55,"publish_date":56,"is_original":4,"collection":57,"cover_url":58,"cover_url_1_1":59,"title":60,"summary":61,"author":28},413,"2023-09-08","#Neuroscience","article_res/cover/74f8302d78a23d9430f22171eae136b6.jpeg","article_res/cover/87ca08af81bb304746be5261160964c0.jpeg","Can machines be conscious?","Do we have an ethical obligation to not turn off conscious machines? Would turning them off be murder? No. I don't lose any sleep over unplugging a conscious machine.\n- Jeff Hawkins, \"A Thousand Brains\"",{"id":63,"publish_date":64,"is_original":23,"collection":65,"cover_url":66,"cover_url_1_1":67,"title":68,"summary":69,"author":28},178,"2024-09-09","#Entrepreneurship","article_res/cover/a7224f025b55d1820408085faef63079.jpeg","article_res/cover/11a9995b096cbf64465ef01b8673b154.jpeg","37signals company","This damn sense of relaxation",{"id":71,"publish_date":72,"is_original":4,"collection":73,"cover_url":74,"cover_url_1_1":75,"title":76,"summary":77,"author":78},460,"2023-05-12","#Google","article_res/cover/b970687b12faa52da976f91248c2aa7b.jpeg","article_res/cover/d1e71b52cfd2c63bc6e71f3e85ff135c.jpeg","Learn what BRC-20 and Ordinals are using Google Bard","Ordinals - a new protocol that allows users to store arbitrary data on the Bitcoin blockchain","Google Bard mainly writes",{"id":80,"publish_date":81,"is_original":23,"collection":5,"cover_url":82,"cover_url_1_1":83,"title":84,"summary":85,"author":28},309,"2024-03-26","article_res/cover/9877f95894ee88532d0e6012c23a2df3.jpeg","article_res/cover/20092164ddc109ce6ae56b1984246751.jpeg","Learning the Cancun Upgrade with lepton and perplexity","Building a quick conversation-based search demo with Lepton AI.",[87,95,103,111,119],{"id":88,"publish_date":89,"is_original":23,"collection":90,"cover_url":91,"cover_url_1_1":92,"title":93,"summary":94,"author":28},627,"2025-03-20","#AI Avatar #AI Video Generation","article_res/cover/d95481358f73924989f8c4ee9c75d1c8.jpeg","article_res/cover/b74bc0fab01f8b6a6aa87696c0c3ed8b.jpeg","DisPose: Generating Animated Videos by Driving Video with Reference Images","DisPose is a controllable human image animation method that enhances video generation.",{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},626,"2025-03-21","#Deep Dive into LLMs #LLM #RL #Andrej Karpathy #AlphaGo","article_res/cover/446553a5c8f8f2f07d97b20eaee84e56.jpeg","article_res/cover/e6c2823409c9b34624064b9acbaca6f1.jpeg","AlphaGo and the Power of Reinforcement Learning - Andrej Karpathy's Deep Dive on LLMs (Part 9)","Simply learning from humans will never surpass human capabilities.",{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},625,"2025-03-22","#Deep Dive into LLMs #LLM #RL #RLHF #Andrej Karpathy","article_res/cover/8da81d38b1e5cf558a164710fd8a5389.jpeg","article_res/cover/96f028d76c362a99a0dd56389e8f7a9b.jpeg","Reinforcement Learning from Human Feedback (RLHF) - Andrej Karpathy's Deep Dive on LLMs (Part 10)","Fine-Tuning Language Models from Human Preferences",{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},624,"2025-03-23","#Deep Dive into LLMs #LLM #Andrej Karpathy #AI Agent #MMM","article_res/cover/a5e7c3d48bb09109684d6513287c661d.jpeg","article_res/cover/d3f22b7c0ab8d82fd2da457a299e0773.jpeg","The Future of Large Language Models - Andrej Karpathy's In-Depth Explanation of LLM (Part 11)","preview of things to come",{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},623,"#Google #Voe #AI Video Generation","article_res/cover/c44062fea0f336c2b96b3928292392c2.jpeg","article_res/cover/a041041c69092ad3db191c5bf3ff981b.jpeg","Trial of Google's video generation model VOE2","Our state-of-the-art video generation model",[127,135,143],{"id":128,"publish_date":129,"is_original":23,"collection":130,"cover_url":131,"cover_url_1_1":132,"title":133,"summary":134,"author":28},300,"2024-04-16","#AI in Science #AGI","article_res/cover/6bf01e793e0f33e848572412eebdf9b0.jpeg","article_res/cover/91a5ee21dafecb914fabeb9430d46ec1.jpeg","Would Einstein lose his job - AI and Quantum Computing: A Glimpse into the Near Future","So Einstein's job is still safe.",{"id":136,"publish_date":137,"is_original":23,"collection":138,"cover_url":139,"cover_url_1_1":140,"title":141,"summary":142,"author":28},101,"2024-12-14","#Nvidia #AI 3D Generator","article_res/cover/693e07c85980c5c0c8fde3f037733f23.jpeg","article_res/cover/9ea8edff2d5d303ff3fffff3f6f9c3d9.jpeg","NVIDIA's open-source 3D project LLaMA-Mesh","LLaMA-Mesh: Unifying 3D Mesh Generation with Language Models",{"id":144,"publish_date":145,"is_original":23,"collection":146,"cover_url":147,"cover_url_1_1":148,"title":149,"summary":150,"author":28},131,"2024-11-10","#OpenAI","article_res/cover/87f8ed353ce39f31960e7cdfaf075a35.jpeg","article_res/cover/f597a63935f5cd32e484b4aadd6019e8.jpeg","ChatGPT has launched the Search function","Get fast, timely answers with links to relevant web sources.",{"big":152,"small":214},[153,181],{"title":154,"list":155},"AGENT",[156,157,165,173],{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":158,"publish_date":159,"is_original":23,"collection":160,"cover_url":161,"cover_url_1_1":162,"title":163,"summary":164,"author":28},622,"2025-03-24","#OWL #AI Agent #MAS #MCP #CUA","article_res/cover/cb50ca7f2bf4d1ed50202d7406e1c19a.jpeg","article_res/cover/4aa7aa3badfacf3cc84121334f1050dd.jpeg","OWL: Multi-agent collaboration","OWL: Optimized Workforce Learning for General Multi-Agent Assistance in Real-World Task Automation",{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},620,"2025-03-26","#LLM #Google #Gemini #AI Agent","article_res/cover/53751a6dbbe990b1eb0b63f3b062aed4.jpeg","article_res/cover/031344981f0a212ff82d1f3a64aa5756.jpeg","Gemini 2.5 Pro, claimed to be far ahead of the competition, has been released with great fanfare: comprehensively surpassing other LLMs and topping the global rankings","Gemini 2.5: Our most intelligent AI model",{"id":174,"publish_date":175,"is_original":23,"collection":176,"cover_url":177,"cover_url_1_1":178,"title":179,"summary":180,"author":28},616,"2025-03-29","#MAS #AI Agent #AI Coder #MetaGPT #MGX","article_res/cover/9dcd702ad2035902e5e77967c34a1f1e.jpeg","article_res/cover/0a97fc4a922753c8f46ff38792020df8.jpeg","MGX - An automated website-building platform composed of multiple AI Agents","Your 24/7 AI Team | Dream, Chat, Create.",{"title":182,"list":183},"OPENAI",[184,191,199,206],{"id":185,"publish_date":167,"is_original":23,"collection":186,"cover_url":187,"cover_url_1_1":188,"title":189,"summary":190,"author":28},619,"#OpenAI #AI Image Generator #4o #MMM #AR Transformer","article_res/cover/2faffc97fcecf3151552cb0fd3206d89.jpeg","article_res/cover/1133cb4948af44cee2e7fbe79efb69e5.jpeg","The native image function of GPT-4o is officially launched","Introducing 4o Image Generation",{"id":192,"publish_date":193,"is_original":4,"collection":194,"cover_url":195,"cover_url_1_1":196,"title":197,"summary":198,"author":28},434,"2023-07-15","#Anthropic #OpenAI #Google #AI Code Generator #Claude","article_res/cover/e1b6f600a2b9f262a4392684e5f2ce25.jpeg","article_res/cover/6e1772e83f78f9a351ab23d3e414adee.jpeg","Latest Updates on Google Bard /Anthropic Claude2 / ChatGPT Code Interpreter","We want our models to use their programming skills to provide more natural interfaces to the basic functions of our computers.  \n - OpenAI",{"id":200,"publish_date":201,"is_original":4,"collection":146,"cover_url":202,"cover_url_1_1":203,"title":204,"summary":205,"author":28},417,"2023-08-24","article_res/cover/bccf897d50a88b18364e35f7466387e0.jpeg","article_res/cover/2f871085c1073717c1703ae86e18056f.jpeg","The GPT-3.5 Turbo fine-tuning (fine-tuning function) has been released～","Developers can now bring their own data to customize GPT-3.5 Turbo for their use cases.",{"id":207,"publish_date":208,"is_original":4,"collection":209,"cover_url":210,"cover_url_1_1":211,"title":212,"summary":213,"author":28},407,"2023-09-22","#OpenAI #AI Image Generator","article_res/cover/c59005e903d35cfc32346e2756e2728a.jpeg","article_res/cover/ba011d265e6d84b5c8cb6fd6b757b6cc.jpeg","Dall-E 3","DALL·E 3 understands significantly more nuance and detail, allowing you to easily translate your ideas into images.",[215,221,241],{"title":10,"list":216},[217,218,219,220],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":223},"GOOGLE",[224,225,226,234],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},615,"2025-03-30","#AI Researcher #AI Science #HKU #Google #AI Agent","article_res/cover/21fadf906067714bb0db31ae13a77c15.jpeg","article_res/cover/2697999a72bd26b22e85f0e92936d3ed.jpeg","AI-Researcher: LLM-driven全自动 scientific research assistant","AI-Researcher: Fully-Automated Scientific Discovery with LLM Agents  \nOpen-Sourced Alternative to Google AI Co-Scientist",{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},463,"2023-05-09","article_res/cover/89800f207723acdb55fc53bf999ebdc9.jpeg","article_res/cover/5764f369b4accd8f83e94aa4c077a175.jpeg","The Smallville sandbox world - A town with 25 virtual residents","Believable proxies of human behavior can empower interactive apps: Immersive environment, Rehearsal space, Prototyping tool",{"title":242,"list":243},"NVIDIA",[],true,{"code":4,"msg":5,"data":246},{"id":247,"publish_date":248,"is_original":23,"collection":249,"articles_id":250,"cover_url":251,"cover_url_1_1":252,"title":253,"summary":254,"author":28,"content":255,"popular":256,"list":320,"category":376,"tag":377},141,"2024-10-28","#AI Video Generator","Fi_m6TGHTkNz62qcY_PtUg","article_res/cover/b61ffaa8fe3bda9386441383659cc1ee.jpeg","article_res/cover/dd4c01118caf3107ba8ed34ebe27a8c1.jpeg","Mochi 1: Open-source video generation model","Mochi 1: A new SOTA in open-source video generation models","\u003Cdiv class=\"rich_media_content js_underline_content\n                       autoTypeSetting24psection\n            \" id=\"js_content\">\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>Mochi 1 Preview is an open-source video generation model released under the Apache 2.0 license. Mochi 1 can produce high-fidelity motion effects and achieve strong prompt responsiveness, significantly narrowing the gap between open-source and closed video generation systems.\u003C/p>\u003Csection>\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"./assets/17423781332300.16370505745067687.mp4\" poster=\"./assets/17423781330100.09343521245378628.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);cursor: pointer;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);'>\u003Cspan style=\"cursor: pointer;font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">Team Introduction\u003C/span>\u003C/h3>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>The core members of Genmo's team come from projects such as DDPM (denoising diffusion probabilistic models), DreamFusion, and Emu Video. Genmo’s technical advisory team also consists of top industry experts, including Ion Stoica, co-founder and executive chairman of Databricks and Anyscale, Pieter Abbeel, co-founder of Covariant and early member of OpenAI, and Joey Gonzalez, a pioneer in language model systems and co-founder of Turi.\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>Funding: Raised $28.4 million in Series A funding led by NEA with Rick Yang as the lead investor. Participating institutions include The House Fund, Gold House Ventures, WndrCo, Eastlink Capital Partners, Essence VC, and angel investors such as Abhay Parasnis (CEO of Typespace), Amjad Masad (CEO of Replit), Sabrina Hahn, Bonita Stewart, and Michele Catasta.\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>Genmo's mission is to unlock the right-brain potential of artificial general intelligence. Mochi 1 is an important starting point for building world simulators that can emulate everything, whether real or fictional.\u003C/p>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);cursor: pointer;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);'>\u003Cspan style=\"cursor: pointer;font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">Model Evaluation\u003C/span>\u003C/h3>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>Currently, there is a significant gap between video generation models and reality. Motion quality and prompt responsiveness are two key functions that video generation models have yet to fully possess.\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>Mochi 1 sets a new benchmark for open-source video generation and is highly competitive with leading closed models in terms of performance:\u003C/p>\u003Cul style='margin-top: 8px;margin-bottom: 8px;cursor: pointer;padding-left: 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);' class=\"list-paddingleft-1\">\u003Cli style=\"cursor: pointer;\">\u003Csection style=\"cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">\u003Cp style=\"cursor: pointer;color: rgb(0, 0, 0);line-height: 1.8em;letter-spacing: 0em;text-indent: 0em;padding-top: 8px;padding-bottom: 8px;\">: Mochi 1 has excellent response capabilities to text prompts, generating videos that accurately reflect the content of the instructions, providing users with detailed control over characters, scenes, and actions. Automated metrics based on visual language models are used to evaluate prompt responsiveness, following the protocol of OpenAI DALL-E 3, while the generated videos are evaluated using the Gemini-1.5-Pro-002 model.\u003C/p>\u003C/section>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100007175\" data-ratio=\"0.8366445916114791\" data-s=\"300,640\" data-type=\"png\" data-w=\"453\" style=\"\" src=\"./assets/17423781333910.7280330082349851.png\">\u003C/p>\u003C/li>\u003Cli style=\"cursor: pointer;\">\u003Csection style=\"cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">\u003Cp style=\"cursor: pointer;color: rgb(0, 0, 0);line-height: 1.8em;letter-spacing: 0em;text-indent: 0em;padding-top: 8px;padding-bottom: 8px;\">: Mochi 1 generates smooth 5.4-second videos at 30 frames per second, with high temporal consistency and realistic motion dynamics. The model can simulate physical phenomena such as fluid mechanics, hair, and fur dynamics, presenting consistently fluid human motions and gradually surpassing the \"uncanny valley\" effect. Evaluators are asked to focus on motion performance rather than single-frame aesthetics (evaluation criteria include the fun of movements, physical plausibility, and smoothness). Elo scores are calculated according to the LMSYS Chatbot Arena protocol.\u003C/p>\u003C/section>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100007176\" data-ratio=\"0.8488888888888889\" data-s=\"300,640\" data-type=\"png\" data-w=\"450\" style=\"\" src=\"./assets/17423781333910.044008087738978796.png\">\u003C/p>\u003C/li>\u003C/ul>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);cursor: pointer;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);'>\u003Cspan style=\"cursor: pointer;font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">Trial Use\u003C/span>\u003C/h3>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>The currently released version is the basic 480p edition. By the end of this year, the team will release the full version of Mochi 1, including Mochi 1 HD. Mochi 1 HD will support 720p video generation with higher detail fidelity and smoother motion performance, better handling extreme cases such as image distortion in complex scenes. Download the model: https://github.com/genmoai/models Official website: https://www.genmo.ai/play\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>I tried it myself, and the results were not great. I guess after the open-source community gets involved, there might be optimizations to improve its performance :)\u003C/p>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);cursor: pointer;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);'>\u003Cspan style=\"cursor: pointer;font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">Limitations\u003C/span>\u003C/h3>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>As a research preview, Mochi 1 is a dynamically evolving checkpoint with some known limitations. The initial version supports 480p video generation and may exhibit slight image distortion and artifacts in edge cases involving extreme motion. Since Mochi 1 is mainly optimized for realistic styles, it performs poorly when generating animated-style content. Additionally, the team anticipates that the community will fine-tune the model to meet different aesthetic needs.\u003C/p>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);cursor: pointer;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);'>\u003Cspan style=\"cursor: pointer;font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">Model Architecture\u003C/span>\u003C/h3>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>Mochi 1 has made significant progress in open-source video generation, adopting a diffusion model with 10 billion parameters based on the innovative Asymmetric Diffusion Transformer (AsymmDiT) architecture. Trained from scratch, it is currently the largest open-source video generation model. More importantly, this architecture is simple and easy to modify.\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>Efficiency is critical so that the community can run the Mochi 1 model. To this end, the team has also open-sourced the video VAE. The VAE can causally compress video data by 128 times, spatially compressing 8x8 and temporally compressing 6 times into a 12-channel latent space.\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>AsymmDiT simplifies text processing, focusing computational resources on visual reasoning, thus efficiently handling user prompts and compressed video tokens. AsymmDiT uses multimodal self-attention mechanisms to jointly attend to text and visual tokens and learns independent MLP layers for each modality, similar to Stable Diffusion 3. However, Mochi 1 configures almost four times more parameters in the visual stream compared to the text stream, processing visual information with a larger hidden dimension. Through non-square QKV and output projection layers, modality unification is achieved in the self-attention mechanism. This asymmetric design effectively reduces memory requirements during inference.\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>Many modern diffusion models use multiple pre-trained language models to process user prompts, but Mochi 1 encodes prompts through a single T5-XXL language model.\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>Mochi 1 can perform 3D attention inference on a context window of up to 44,520 video tokens. To locate each token, Mochi 1 extends learnable Rotational Position Embeddings (RoPE) to three dimensions. The network can learn mixed frequencies across spatial and temporal axes end-to-end.\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>Mochi 1 also benefits from some of the latest improvements in language model scaling, including SwiGLU feedforward layers, query-key normalization for enhanced stability, and sandwich normalization for controlling internal activations.\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003Cp style=\"display: none;\">\u003Cmp-style-type data-value=\"3\">\u003C/mp-style-type>\u003C/p>\u003C/div>",[257,266,273,280,288,296,304,312],{"id":258,"title_md5":259,"publish_date":260,"author_md5":261,"is_original":23,"collection":262,"summary_md5":263,"cover_url":264,"cover_url_1_1":265},44,"37c6a12263d835028262556eed43b887","2025-02-09","bc27fa490c4d0d525bac812fc0793534","#Apple #AI Robot","809a41b3903398753dc5066f0a1cba8e","article_res/cover/9b8fc460f99f4c4511b3c64e320997cf.jpeg","article_res/cover/31d123f02ee22d0d5445f8455ddf681b.jpeg",{"id":267,"title_md5":268,"publish_date":269,"author_md5":261,"is_original":4,"collection":65,"summary_md5":270,"cover_url":271,"cover_url_1_1":272},423,"353c79fd0dfd839c493fd1f120759e3f","2023-08-08","0ec8feda670e2d0b61eba8192e0df1be","article_res/cover/969d465ab83c09548af661cf27d35200.jpeg","article_res/cover/07a105def6623dfafe9f093a7d0167e7.jpeg",{"id":274,"title_md5":275,"publish_date":276,"author_md5":261,"is_original":23,"collection":5,"summary_md5":277,"cover_url":278,"cover_url_1_1":279},314,"577d9bb66a24cc28ba64df3dd18e9786","2024-03-16","7ac3b4869e9e8bdf2d053d10355f7901","article_res/cover/f9d727dc25194bfbd719668aabaca6f5.jpeg","article_res/cover/884e54049dfda0ef62cd4beb83757df2.jpeg",{"id":281,"title_md5":282,"publish_date":283,"author_md5":261,"is_original":23,"collection":284,"summary_md5":285,"cover_url":286,"cover_url_1_1":287},457,"bfd4e53a44215c33b25202378ceeaa7e","2023-05-16","#AI Video Generator #Stable Diffusion","104e237dd393f5178bb076f615695d5c","article_res/cover/8040b312d8f106087fc24aa8a5d086a1.jpeg","article_res/cover/bf19f7b2d7c128248d583862bad96e55.jpeg",{"id":289,"title_md5":290,"publish_date":291,"author_md5":261,"is_original":23,"collection":292,"summary_md5":293,"cover_url":294,"cover_url_1_1":295},46,"d89b72d178b80d754069b4e0b16683ed","2025-02-07","#Bytedance #AI Avatar #AI Video Generator","506910d5a17ab6dee1ae6e322841d330","article_res/cover/5000b160051edfc2e45d5a5b93640159.jpeg","article_res/cover/80696be2904b86eb303ad4dbd6dbdbec.jpeg",{"id":297,"title_md5":298,"publish_date":299,"author_md5":261,"is_original":23,"collection":300,"summary_md5":301,"cover_url":302,"cover_url_1_1":303},98,"e33c7190eea3c5235f83a96da09ce9a8","2024-12-17","#AI Video Generator #Sora #Pika","1224923af65fd20370dc02e30ec1397f","article_res/cover/3b86e85d03fff4f356a3e4cf2bb329c9.jpeg","article_res/cover/5fa5c20ad0b40f8f544d257c0ef02938.jpeg",{"id":305,"title_md5":306,"publish_date":307,"author_md5":308,"is_original":4,"collection":5,"summary_md5":309,"cover_url":310,"cover_url_1_1":311},610,"dfb3144dba26c51be29fbf39ad7e3b9d","2022-03-07","4afa1f337c03df0a2e3454c8e9799662","271ede7e7f7532d11d6ab7058a213ebb","article_res/cover/e0d64dda34b8acc71583ce0ffdfcb430.jpeg","article_res/cover/5e763f310a559b989f929e0cee5d4b37.jpeg",{"id":313,"title_md5":314,"publish_date":315,"author_md5":261,"is_original":23,"collection":316,"summary_md5":317,"cover_url":318,"cover_url_1_1":319},96,"2606299aa85b82a155dcf95efdfb7847","2024-12-19","#OpenAI #LLM #ChatGPT","3bccabb72efefc849d584040de6806ec","article_res/cover/5ade9b60d58aa520d9a28561c64d8c5d.jpeg","article_res/cover/ddfa142e8b7f1e655226194a9bddf230.jpeg",{"related":321,"small":361},[322,330,338,346,354],{"id":323,"publish_date":324,"is_original":23,"collection":325,"cover_url":326,"cover_url_1_1":327,"title":328,"summary":329,"author":28},224,"2024-07-09","#AI Avatar","article_res/cover/670fbb56c51f582e4b359a0b5c9eefd0.jpeg","article_res/cover/c9a84424a4c803f0e2565e847e9f36ac.jpeg","Kuaishou's LivePortrait - A Video-driven Avatar Animation Framework","LivePortrait: Efficient Portrait Animation with Stitching and Retargeting Control",{"id":331,"publish_date":332,"is_original":4,"collection":5,"cover_url":333,"cover_url_1_1":334,"title":335,"summary":336,"author":337},541,"2022-05-15","article_res/cover/e961f7a10d80fdeccf3e946f8a32f197.jpeg","article_res/cover/94c33e16b6a938364a0df4b962a0cf16.jpeg","Decentralized Society: Finding Web3’s Soul / 去中心化社会：寻找Web3的灵魂 -2","A person might have a Soul that stores SBTs representing educational credentials, employment history, or works of art.","Translation",{"id":339,"publish_date":340,"is_original":23,"collection":341,"cover_url":342,"cover_url_1_1":343,"title":344,"summary":345,"author":28},36,"2025-02-17","#Baidu #AI Avatar","article_res/cover/2f0a10000836de9daa5902903690697c.jpeg","article_res/cover/19bcf2b18f917a8e7e375cb69a00687d.jpeg","Baidu's Hallo3 - Voice-driven portrait image animation","Hallo3: Highly Dynamic and Realistic Portrait Image Animation with Diffusion Transformer Networks",{"id":347,"publish_date":348,"is_original":4,"collection":5,"cover_url":349,"cover_url_1_1":350,"title":351,"summary":352,"author":353},507,"2022-07-31","article_res/cover/2bb15a23eaad2f90ffe5a1dd2362c2f3.jpeg","article_res/cover/511f84be021e2f205dcbbc1b2829fcb5.jpeg","General Thinking Tools","In a world where people will certainly make mistakes no matter how careful they are, a little relaxation seems more beneficial to our health compared to extreme neuroticism.","Reading notes",{"id":355,"publish_date":356,"is_original":23,"collection":5,"cover_url":357,"cover_url_1_1":358,"title":359,"summary":360,"author":28},430,"2023-07-22","article_res/cover/014790300fa3eae2261684ed55bf4c49.jpeg","article_res/cover/ca5250b750dfabe2b55173a5a1c31429.jpeg","The Personal Rational Achievements of the Puritans and the Great Loyalty of Song Dynasty Literati","When Kou Laigong was in power, Emperor Zhenzong had the good fortune of the Chan Yuan alliance. He was able to support the Son of Heaven steadfastly, like a mountain unmoved, repelling the nomadic tribes and safeguarding the ancestral temples. The whole country called him the epitome of loyalty.\n- Fan Zhongyan, \"Praise for Yang Wengong's Portrait\"",[362,368,374],{"title":10,"list":363},[364,365,366,367],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":369},[370,371,372,373],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},{"title":242,"list":375},[],[8,9,10],[8,12,13,14,9,10,15,16,17,18],["Reactive",245],1754646420713]