[{"data":1,"prerenderedAt":380},["ShallowReactive",2],{"$fgukOamtKU1RtUiMFsqdObttmqPPQz0uc7bl_gj_LyX0":3,"$f8m0RpV0v0QCvO_uX05Apu7aPD2R1pqoAPlpVNkdUsG0":245,"article-243":379},{"code":4,"msg":5,"data":6},0,"",{"category":7,"tag":11,"popular":19,"latest":86,"banner":126,"list":151,"cache":244},[8,9,10],"Agent","OpenAI","LLM",[8,12,13,14,9,10,15,16,17,18],"Google","Nvidia","Claude","DeepSeek","OCR","Chat","Generator",[20,29,37,45,54,62,70,79],{"id":21,"publish_date":22,"is_original":23,"collection":5,"cover_url":24,"cover_url_1_1":25,"title":26,"summary":27,"author":28},411,"2023-09-10",1,"article_res/cover/451ef50c225a8dc61c4336506794d13b.jpeg","article_res/cover/3ba9dc7a72f87d40b20fc2d225289ee3.jpeg","Idealism","Reality is created by the mind, we can change our reality by changing our mind. - Plato","Renee's Entrepreneurial Journey",{"id":30,"publish_date":31,"is_original":23,"collection":32,"cover_url":33,"cover_url_1_1":34,"title":35,"summary":36,"author":28},108,"2024-12-07","#LLM #AGI #AI Agent","article_res/cover/0039044422e4ec9f61c18e8ee1693bb0.jpeg","article_res/cover/4220971b108a91d21407d87bb02fbaa6.jpeg","Freysa.ai: The World's First Adversarial AI Agent Game","说服 Freysa 把钱包里的钱都拿出来",{"id":38,"publish_date":39,"is_original":23,"collection":40,"cover_url":41,"cover_url_1_1":42,"title":43,"summary":44,"author":28},12,"2025-03-09","#Oxford #Reasoning #LLM #Tool Use","article_res/cover/d448e9b3617a0b5302e1bd10c438bca9.jpeg","article_res/cover/864a468f9cc4c9317efadb3811909888.jpeg","Agentic Reasoning Framework - Significantly enhance the reasoning ability of LLMs through the integration of external tools using agents","Agentic Reasoning: Reasoning LLMs with Tools for Deep Research",{"id":46,"publish_date":47,"is_original":4,"collection":48,"cover_url":49,"cover_url_1_1":50,"title":51,"summary":52,"author":53},480,"2023-04-14","#Stable Diffusion","article_res/cover/0bdbe7cb1de4a78e54536e5d9afa7ec9.jpeg","article_res/cover/b3d6ffec0608dcfaf18c5a69906d1490.jpeg","【AIGC Learning】Generate Prompts Using Word Graphs - Stable Diffusion Web UI Series 13","AI will become a powerful tool in education, transforming the way we learn and deliver instruction.  \n- Reid Hoffman","--",{"id":55,"publish_date":56,"is_original":4,"collection":57,"cover_url":58,"cover_url_1_1":59,"title":60,"summary":61,"author":28},413,"2023-09-08","#Neuroscience","article_res/cover/74f8302d78a23d9430f22171eae136b6.jpeg","article_res/cover/87ca08af81bb304746be5261160964c0.jpeg","Can machines be conscious?","Do we have an ethical obligation to not turn off conscious machines? Would turning them off be murder? No. I don't lose any sleep over unplugging a conscious machine.\n- Jeff Hawkins, \"A Thousand Brains\"",{"id":63,"publish_date":64,"is_original":23,"collection":65,"cover_url":66,"cover_url_1_1":67,"title":68,"summary":69,"author":28},178,"2024-09-09","#Entrepreneurship","article_res/cover/a7224f025b55d1820408085faef63079.jpeg","article_res/cover/11a9995b096cbf64465ef01b8673b154.jpeg","37signals company","This damn sense of relaxation",{"id":71,"publish_date":72,"is_original":4,"collection":73,"cover_url":74,"cover_url_1_1":75,"title":76,"summary":77,"author":78},460,"2023-05-12","#Google","article_res/cover/b970687b12faa52da976f91248c2aa7b.jpeg","article_res/cover/d1e71b52cfd2c63bc6e71f3e85ff135c.jpeg","Learn what BRC-20 and Ordinals are using Google Bard","Ordinals - a new protocol that allows users to store arbitrary data on the Bitcoin blockchain","Google Bard mainly writes",{"id":80,"publish_date":81,"is_original":23,"collection":5,"cover_url":82,"cover_url_1_1":83,"title":84,"summary":85,"author":28},309,"2024-03-26","article_res/cover/9877f95894ee88532d0e6012c23a2df3.jpeg","article_res/cover/20092164ddc109ce6ae56b1984246751.jpeg","Learning the Cancun Upgrade with lepton and perplexity","Building a quick conversation-based search demo with Lepton AI.",[87,95,103,111,119],{"id":88,"publish_date":89,"is_original":23,"collection":90,"cover_url":91,"cover_url_1_1":92,"title":93,"summary":94,"author":28},627,"2025-03-20","#AI Avatar #AI Video Generation","article_res/cover/d95481358f73924989f8c4ee9c75d1c8.jpeg","article_res/cover/b74bc0fab01f8b6a6aa87696c0c3ed8b.jpeg","DisPose: Generating Animated Videos by Driving Video with Reference Images","DisPose is a controllable human image animation method that enhances video generation.",{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},626,"2025-03-21","#Deep Dive into LLMs #LLM #RL #Andrej Karpathy #AlphaGo","article_res/cover/446553a5c8f8f2f07d97b20eaee84e56.jpeg","article_res/cover/e6c2823409c9b34624064b9acbaca6f1.jpeg","AlphaGo and the Power of Reinforcement Learning - Andrej Karpathy's Deep Dive on LLMs (Part 9)","Simply learning from humans will never surpass human capabilities.",{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},625,"2025-03-22","#Deep Dive into LLMs #LLM #RL #RLHF #Andrej Karpathy","article_res/cover/8da81d38b1e5cf558a164710fd8a5389.jpeg","article_res/cover/96f028d76c362a99a0dd56389e8f7a9b.jpeg","Reinforcement Learning from Human Feedback (RLHF) - Andrej Karpathy's Deep Dive on LLMs (Part 10)","Fine-Tuning Language Models from Human Preferences",{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},624,"2025-03-23","#Deep Dive into LLMs #LLM #Andrej Karpathy #AI Agent #MMM","article_res/cover/a5e7c3d48bb09109684d6513287c661d.jpeg","article_res/cover/d3f22b7c0ab8d82fd2da457a299e0773.jpeg","The Future of Large Language Models - Andrej Karpathy's In-Depth Explanation of LLM (Part 11)","preview of things to come",{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},623,"#Google #Voe #AI Video Generation","article_res/cover/c44062fea0f336c2b96b3928292392c2.jpeg","article_res/cover/a041041c69092ad3db191c5bf3ff981b.jpeg","Trial of Google's video generation model VOE2","Our state-of-the-art video generation model",[127,135,143],{"id":128,"publish_date":129,"is_original":23,"collection":130,"cover_url":131,"cover_url_1_1":132,"title":133,"summary":134,"author":28},300,"2024-04-16","#AI in Science #AGI","article_res/cover/6bf01e793e0f33e848572412eebdf9b0.jpeg","article_res/cover/91a5ee21dafecb914fabeb9430d46ec1.jpeg","Would Einstein lose his job - AI and Quantum Computing: A Glimpse into the Near Future","So Einstein's job is still safe.",{"id":136,"publish_date":137,"is_original":23,"collection":138,"cover_url":139,"cover_url_1_1":140,"title":141,"summary":142,"author":28},101,"2024-12-14","#Nvidia #AI 3D Generator","article_res/cover/693e07c85980c5c0c8fde3f037733f23.jpeg","article_res/cover/9ea8edff2d5d303ff3fffff3f6f9c3d9.jpeg","NVIDIA's open-source 3D project LLaMA-Mesh","LLaMA-Mesh: Unifying 3D Mesh Generation with Language Models",{"id":144,"publish_date":145,"is_original":23,"collection":146,"cover_url":147,"cover_url_1_1":148,"title":149,"summary":150,"author":28},131,"2024-11-10","#OpenAI","article_res/cover/87f8ed353ce39f31960e7cdfaf075a35.jpeg","article_res/cover/f597a63935f5cd32e484b4aadd6019e8.jpeg","ChatGPT has launched the Search function","Get fast, timely answers with links to relevant web sources.",{"big":152,"small":214},[153,181],{"title":154,"list":155},"AGENT",[156,157,165,173],{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":158,"publish_date":159,"is_original":23,"collection":160,"cover_url":161,"cover_url_1_1":162,"title":163,"summary":164,"author":28},622,"2025-03-24","#OWL #AI Agent #MAS #MCP #CUA","article_res/cover/cb50ca7f2bf4d1ed50202d7406e1c19a.jpeg","article_res/cover/4aa7aa3badfacf3cc84121334f1050dd.jpeg","OWL: Multi-agent collaboration","OWL: Optimized Workforce Learning for General Multi-Agent Assistance in Real-World Task Automation",{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},620,"2025-03-26","#LLM #Google #Gemini #AI Agent","article_res/cover/53751a6dbbe990b1eb0b63f3b062aed4.jpeg","article_res/cover/031344981f0a212ff82d1f3a64aa5756.jpeg","Gemini 2.5 Pro, claimed to be far ahead of the competition, has been released with great fanfare: comprehensively surpassing other LLMs and topping the global rankings","Gemini 2.5: Our most intelligent AI model",{"id":174,"publish_date":175,"is_original":23,"collection":176,"cover_url":177,"cover_url_1_1":178,"title":179,"summary":180,"author":28},616,"2025-03-29","#MAS #AI Agent #AI Coder #MetaGPT #MGX","article_res/cover/9dcd702ad2035902e5e77967c34a1f1e.jpeg","article_res/cover/0a97fc4a922753c8f46ff38792020df8.jpeg","MGX - An automated website-building platform composed of multiple AI Agents","Your 24/7 AI Team | Dream, Chat, Create.",{"title":182,"list":183},"OPENAI",[184,191,199,206],{"id":185,"publish_date":167,"is_original":23,"collection":186,"cover_url":187,"cover_url_1_1":188,"title":189,"summary":190,"author":28},619,"#OpenAI #AI Image Generator #4o #MMM #AR Transformer","article_res/cover/2faffc97fcecf3151552cb0fd3206d89.jpeg","article_res/cover/1133cb4948af44cee2e7fbe79efb69e5.jpeg","The native image function of GPT-4o is officially launched","Introducing 4o Image Generation",{"id":192,"publish_date":193,"is_original":4,"collection":194,"cover_url":195,"cover_url_1_1":196,"title":197,"summary":198,"author":28},434,"2023-07-15","#Anthropic #OpenAI #Google #AI Code Generator #Claude","article_res/cover/e1b6f600a2b9f262a4392684e5f2ce25.jpeg","article_res/cover/6e1772e83f78f9a351ab23d3e414adee.jpeg","Latest Updates on Google Bard /Anthropic Claude2 / ChatGPT Code Interpreter","We want our models to use their programming skills to provide more natural interfaces to the basic functions of our computers.  \n - OpenAI",{"id":200,"publish_date":201,"is_original":4,"collection":146,"cover_url":202,"cover_url_1_1":203,"title":204,"summary":205,"author":28},417,"2023-08-24","article_res/cover/bccf897d50a88b18364e35f7466387e0.jpeg","article_res/cover/2f871085c1073717c1703ae86e18056f.jpeg","The GPT-3.5 Turbo fine-tuning (fine-tuning function) has been released～","Developers can now bring their own data to customize GPT-3.5 Turbo for their use cases.",{"id":207,"publish_date":208,"is_original":4,"collection":209,"cover_url":210,"cover_url_1_1":211,"title":212,"summary":213,"author":28},407,"2023-09-22","#OpenAI #AI Image Generator","article_res/cover/c59005e903d35cfc32346e2756e2728a.jpeg","article_res/cover/ba011d265e6d84b5c8cb6fd6b757b6cc.jpeg","Dall-E 3","DALL·E 3 understands significantly more nuance and detail, allowing you to easily translate your ideas into images.",[215,221,241],{"title":10,"list":216},[217,218,219,220],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":223},"GOOGLE",[224,225,226,234],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},615,"2025-03-30","#AI Researcher #AI Science #HKU #Google #AI Agent","article_res/cover/21fadf906067714bb0db31ae13a77c15.jpeg","article_res/cover/2697999a72bd26b22e85f0e92936d3ed.jpeg","AI-Researcher: LLM-driven全自动 scientific research assistant","AI-Researcher: Fully-Automated Scientific Discovery with LLM Agents  \nOpen-Sourced Alternative to Google AI Co-Scientist",{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},463,"2023-05-09","article_res/cover/89800f207723acdb55fc53bf999ebdc9.jpeg","article_res/cover/5764f369b4accd8f83e94aa4c077a175.jpeg","The Smallville sandbox world - A town with 25 virtual residents","Believable proxies of human behavior can empower interactive apps: Immersive environment, Rehearsal space, Prototyping tool",{"title":242,"list":243},"NVIDIA",[],true,{"code":4,"msg":5,"data":246},{"id":247,"publish_date":248,"is_original":23,"collection":249,"articles_id":250,"cover_url":251,"cover_url_1_1":252,"title":253,"summary":254,"author":28,"content":255,"popular":256,"list":321,"category":377,"tag":378},243,"2024-06-17","#AI Avatar #Tencent","BatF035W_k4cl7L2K23RWA","article_res/cover/18411086f214e950ca1ad80957d15d52.jpeg","article_res/cover/fa639bdf4a5ba7330027539519841b0c.jpeg","Two models from Tencent that make Avatars talk: V-Express and MuseTalk","V-Express aims to generate a talking head video under the control of a reference image and an audio.","\u003Cdiv class=\"rich_media_content js_underline_content\n                       autoTypeSetting24psection\n            \" id=\"js_content\">\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>Several technologies that enable Avatars to start speaking have been introduced before:\u003C/p>\u003Cul data-tool=\"mdnice编辑器\" class=\"list-paddingleft-1\" style='margin-top: 8px;margin-bottom: 8px;padding-left: 25px;width: 557.438px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;'>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Ca target=\"_blank\" href=\"https://mp.weixin.qq.com/s?__biz=MzkwOTMzMzk0MQ==&amp;mid=2247487690&amp;idx=1&amp;sn=b33cedc3278337d11a7743ccd3fe0852&amp;chksm=c13d1410f64a9d06f7e3fed6e18363d216086b3bfe423a30f41f7a6962b9db00a2b3dd2cde3d&amp;scene=21#wechat_redirect\" textvalue=\"AniPortrait\" linktype=\"text\" imgurl=\"\" imgdata=\"null\" data-itemshowtype=\"0\" tab=\"innerlink\" data-linktype=\"2\">AniPortrait\u003C/a>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Ca target=\"_blank\" href=\"https://mp.weixin.qq.com/s?__biz=MzkwOTMzMzk0MQ==&amp;mid=2247487736&amp;idx=1&amp;sn=12b99bfcf33982c06a9a356292d42f23&amp;chksm=c13d1422f64a9d34f4bf1de9bd647a3fb3e939fa8926f5c124b9b7163c79b34b5390f632e157&amp;scene=21#wechat_redirect\" textvalue=\"Rhubarb\" linktype=\"text\" imgurl=\"\" imgdata=\"null\" data-itemshowtype=\"0\" tab=\"innerlink\" data-linktype=\"2\">Rhubarb\u003C/a>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Ca target=\"_blank\" href=\"https://mp.weixin.qq.com/s?__biz=MzkwOTMzMzk0MQ==&amp;mid=2247488209&amp;idx=1&amp;sn=7553b6dcbe5ff825da1e11c6ff36c6b3&amp;chksm=c13d160bf64a9f1d76d59d3abc60ef4ec7b0c0433a5a0220f7ecf4700689cf1e17dd933418eb&amp;scene=21#wechat_redirect\" textvalue=\"Synthesia\" linktype=\"text\" imgurl=\"\" imgdata=\"null\" data-itemshowtype=\"0\" tab=\"innerlink\" data-linktype=\"2\">Synthesia\u003C/a>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Ca target=\"_blank\" href=\"https://mp.weixin.qq.com/s?__biz=MzkwOTMzMzk0MQ==&amp;mid=2247485313&amp;idx=1&amp;sn=b36e78cef530b058c5e79ba82e0462a3&amp;chksm=c13d035bf64a8a4d1081372e4f7cab990b6b3f4b9e1ddc71ffb5e017498c825d905b61571aa6&amp;scene=21#wechat_redirect\" textvalue=\"Sadtalker\" linktype=\"text\" imgurl=\"\" imgdata=\"null\" data-itemshowtype=\"0\" tab=\"innerlink\" data-linktype=\"2\">Sadtalker\u003C/a>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Ca target=\"_blank\" href=\"https://mp.weixin.qq.com/s?__biz=MzkwOTMzMzk0MQ==&amp;mid=2247486443&amp;idx=1&amp;sn=a6f1ca9fe096793f0e4062d5ba81ec25&amp;chksm=c13d0f31f64a8627553543c628d1c60fc319cb723dcc45a9f031ae08d2047bc18fa31fc10c64&amp;scene=21#wechat_redirect\" textvalue=\"GAIA\" linktype=\"text\" imgurl=\"\" imgdata=\"null\" data-itemshowtype=\"0\" tab=\"innerlink\" data-linktype=\"2\">GAIA\u003C/a>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Ca target=\"_blank\" href=\"https://mp.weixin.qq.com/s?__biz=MzkwOTMzMzk0MQ==&amp;mid=2247488924&amp;idx=1&amp;sn=7ca9212c451ab6fd3b6e6d126ee344e5&amp;chksm=c13d1146f64a985095d4305386d6cdac8950e34de5db6e4a9cab1b9e06b67af7b17f90199ba6&amp;scene=21#wechat_redirect\" textvalue=\"VASA\" linktype=\"text\" imgurl=\"\" imgdata=\"null\" data-itemshowtype=\"0\" tab=\"innerlink\" data-linktype=\"2\">VASA\u003C/a>\u003C/section>\u003C/li>\u003C/ul>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>Today, we will share two more from Tencent:\u003C/p>\u003Ch2 data-tool=\"mdnice编辑器\" style='margin-top: 30px;margin-bottom: 15px;font-weight: bold;font-size: 22px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;letter-spacing: normal;text-align: left;text-wrap: wrap;'>V-Express\u003C/h2>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>: V-Express aims to generate a talking-head video controlled by a reference image, audio, and a series of V-Kps images.\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>\u003Cstrong>Link\u003C/strong>：https://github.com/tencent-ailab/V-Express\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>: Tencent\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>\u003Cstrong>3 Scenarios\u003C/strong>：\u003C/p>\u003Col data-tool=\"mdnice编辑器\" class=\"list-paddingleft-1\" style='margin-top: 8px;margin-bottom: 8px;padding-left: 25px;width: 557.438px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;'>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">If there is a photo of A and another speaking video of A in a different scene, the model can generate a speaking video consistent with the given video.\u003C/p>\u003C/section>\u003Csection>\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"./assets/17423804007540.8777509486927204.mp4\" poster=\"./assets/17423804007560.7437984127054813.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">If there is only one photo and any speaking audio, the model can generate vivid mouth movements for a fixed face.\u003C/p>\u003C/section>\u003Csection>\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"./assets/17423804007560.05633817239393557.mp4\" poster=\"./assets/17423804007530.22316622094962302.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">\u003Cstrong>Scenario 3 (A's photo and B's speaking video.)\u003C/strong>\u003C/p>\u003C/section>\u003C/li>\u003C/ol>\u003Cul data-tool=\"mdnice编辑器\" class=\"list-paddingleft-1\" style='margin-top: 8px;margin-bottom: 8px;padding-left: 25px;width: 557.438px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;'>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">The model can generate vivid mouth movements for a fixed face.\u003C/p>\u003C/section>\u003Csection>\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"./assets/17423804007700.0959853718952135.mp4\" poster=\"./assets/17423804007490.47168764640939176.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">The model generates vivid mouth movements accompanied by slight facial movements.\u003C/p>\u003C/section>\u003Csection>\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"./assets/17423804011700.4548436188101359.mp4\" poster=\"./assets/17423804007760.7829596318983101.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">The model generates a video with the same actions as the target video, where the character's lip shapes are synchronized to match the target audio.\u003C/p>\u003C/section>\u003Csection>\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"./assets/17423804026890.3130494879381387.mp4\" poster=\"./assets/17423804013690.38075030800764287.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003C/li>\u003C/ul>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>\u003Cstrong>Model Architecture\u003C/strong>：\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100005047\" data-ratio=\"0.7814814814814814\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423804074340.461660206503159.png\">\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>The backbone of V-Express is a denoising U-Net that denoises multi-frame noisy latent variables under specific conditions. The architecture of this denoising U-Net is very similar to SDv1.5, with the main difference being four attention layers in each Transformer block instead of two. The first attention layer is a self-attention layer, just like in SDv1.5. The second and third attention layers are cross-attention layers. The second attention layer is called the reference attention layer, used to encode the relationship with the reference image. The third attention layer is called the audio attention layer, used to encode the relationship with the audio. These three attention layers are all spatial attention layers. Finally, the fourth attention layer is called the motion attention layer, which is a temporal self-attention layer used to capture the temporal relationships between video frames.\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>In addition, V-Express contains three key modules: ReferenceNet, V-Kps Guider, and Audio Projection, which are used respectively to encode the reference image, V-Kps images, and audio.\u003C/p>\u003Ch2 data-tool=\"mdnice编辑器\" style='margin-top: 30px;margin-bottom: 15px;font-weight: bold;font-size: 22px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;letter-spacing: normal;text-align: left;text-wrap: wrap;'>MuseTalk\u003C/h2>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>: MuseTalk is a real-time high-quality lip synchronization tool achieved through latent space patching.\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>\u003Cstrong>Link\u003C/strong>：https://github.com/TMElyralab/MuseTalk\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>: Tencent\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>\u003Cstrong>Scenario\u003C/strong>：\u003C/p>\u003Cul data-tool=\"mdnice编辑器\" class=\"list-paddingleft-1\" style='margin-top: 8px;margin-bottom: 8px;padding-left: 25px;width: 557.438px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;'>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cstrong style=\"color: black;\">MuseV + MuseTalk brings portrait photos to life!\u003C/strong>\u003C/section>\u003Csection>\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"./assets/17423804072710.9810704202095835.mp4\" poster=\"./assets/17423804010870.42886754738059585.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cstrong style=\"color: black;\">Video dubbing\u003C/strong>\u003C/section>\u003Csection>\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"./assets/17423804033800.5309565359210255.mp4\" poster=\"./assets/17423804014670.7851603818602892.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cstrong style=\"color: black;\">Some interesting videos!\u003C/strong>\u003C/section>\u003Csection>\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"./assets/17423804020980.9120016400706557.mp4\" poster=\"./assets/17423804013040.005410682803743017.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003C/li>\u003C/ul>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>\u003Cstrong>Model Architecture\u003C/strong>：\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100005048\" data-ratio=\"0.4546296296296296\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"\" src=\"./assets/17423804074310.465123053048877.jpeg\">\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>MuseTalk is trained in the latent space, where images are encoded by a frozen VAE and audio is encoded by a frozen whisper-tiny model. The architecture of the generative network draws inspiration from the UNet of stable-diffusion-v1-4, fusing audio embeddings into image embeddings through cross-attention. Although the architecture used by MuseTalk is very similar to Stable Diffusion, what makes MuseTalk unique is that it is not a diffusion model but operates through single-step patching in the latent space.\u003C/p>\u003Cp style=\"display: none;\">\u003Cmp-style-type data-value=\"3\">\u003C/mp-style-type>\u003C/p>\u003C/div>",[257,266,274,283,291,299,306,313],{"id":258,"title_md5":259,"publish_date":260,"author_md5":261,"is_original":4,"collection":262,"summary_md5":263,"cover_url":264,"cover_url_1_1":265},608,"f322be5606fcfbd40695c27abfc3328f","2022-03-09","7d83126d7919a9a379875f1ec38011de","#Psychology","c139c5c29b8c93e372a0d367cdc62321","article_res/cover/ef3a6ab91666d3c4d444fa898eb9b7bf.jpeg","article_res/cover/edf2d6b9d836a84920d8cc74b2fc7086.jpeg",{"id":267,"title_md5":268,"publish_date":269,"author_md5":270,"is_original":4,"collection":5,"summary_md5":271,"cover_url":272,"cover_url_1_1":273},495,"9b15ccc6efafbde434556a70af4440d0","2023-03-30","9f3428c4d2d88afac6a16510d115e41c","8c3a91662be7ddf873148e266df78686","article_res/cover/9cc1e0b26ea462d3277b0d98b9a74620.jpeg","article_res/cover/8ecf7bdc377a506ebe83640c883cdff7.jpeg",{"id":275,"title_md5":276,"publish_date":277,"author_md5":278,"is_original":23,"collection":279,"summary_md5":280,"cover_url":281,"cover_url_1_1":282},283,"21473d5e51d71b3f083e72fc954d9985","2024-05-05","bc27fa490c4d0d525bac812fc0793534","#AI Index Report 2024 #LLM #Alignment","4b1bd2a48f238b0d6dfd1552aeb648c2","article_res/cover/6182044281bbccd7ee6db9e3e4c8783c.jpeg","article_res/cover/baeb687f1c5504562e618281de151def.jpeg",{"id":284,"title_md5":285,"publish_date":286,"author_md5":287,"is_original":4,"collection":5,"summary_md5":288,"cover_url":289,"cover_url_1_1":290},584,"3bbbd2d5d6abaef308c0a7a487b8ba5e","2022-04-02","041a0f0d2be22c4bcabaaf966a63be32","1f34fc9fb6c7cc29d53761129dc2125b","article_res/cover/4cc95679033e72218723f1a31ea9c5ee.jpeg","article_res/cover/6c2ae10e20e702afb0f7ef93d0cac67f.jpeg",{"id":292,"title_md5":293,"publish_date":294,"author_md5":295,"is_original":4,"collection":5,"summary_md5":296,"cover_url":297,"cover_url_1_1":298},542,"785a4a691b99c7a0daff631debf28729","2022-05-14","8b3607d0f4181a3cb6ffdccf7185f09b","04a06e24c68cf714ffbc29a0076bd5ff","article_res/cover/0f2624d300aa00dbedaf2a8ebe9bea72.jpeg","article_res/cover/bf245440b515c5ef593634c23d01c8b3.jpeg",{"id":300,"title_md5":301,"publish_date":302,"author_md5":278,"is_original":4,"collection":5,"summary_md5":303,"cover_url":304,"cover_url_1_1":305},440,"2625737f4c4b641898eb12f03ddd434f","2023-06-24","3514c1dce56b91ffba23126867b67fef","article_res/cover/e5ac56e1640c601a518a9fb24370ec3e.jpeg","article_res/cover/659be0c95bfea0624423348bdd82434c.jpeg",{"id":307,"title_md5":308,"publish_date":309,"author_md5":278,"is_original":4,"collection":5,"summary_md5":310,"cover_url":311,"cover_url_1_1":312},373,"f825f76756a966c1ac434c35d3ba74a3","2023-12-12","148ba5a51415c325b25bab228cbdb41c","article_res/cover/ab378e476288da281f6b7d4dde1e1626.jpeg","article_res/cover/25df30becd71e4d0261d7e742ba6135e.jpeg",{"id":314,"title_md5":315,"publish_date":316,"author_md5":278,"is_original":4,"collection":317,"summary_md5":318,"cover_url":319,"cover_url_1_1":320},322,"b22fd15e98f2010f3be84f8df53517d0","2024-02-26","#OpenAI #Sora #World Model #MOE","4da005409a3e12e7eb103dbab651f461","article_res/cover/7983d0343a3030031c520f7c98d836f5.jpeg","article_res/cover/0846a27714dddadf46c49e2474764f74.jpeg",{"related":322,"small":362},[323,331,340,348,355],{"id":324,"publish_date":325,"is_original":23,"collection":326,"cover_url":327,"cover_url_1_1":328,"title":329,"summary":330,"author":28},330,"2024-02-09","#Entrepreneurship #Psychology","article_res/cover/afe58375102f83a77798fa3402db8cfe.jpeg","article_res/cover/b0237ae3f2790752bdf3f2b0860b4cc9.jpeg","The four quadrants of sudden sensation/sensitivity in the dimensions of events and human relationships.","Deduction relies on logic; induction relies on IQ. -- Congge's quotes",{"id":332,"publish_date":333,"is_original":4,"collection":334,"cover_url":335,"cover_url_1_1":336,"title":337,"summary":338,"author":339},155,"2024-10-10","#History of Intelligence #Neuroscience","article_res/cover/10e1e9838c22fb47444422912cf8e145.jpeg","article_res/cover/dcfca8f9c278e98010611cbd5e1d69df.jpeg","【A Brief History of Intelligence】6. Speaking Language (Human)","What makes us humans unique is that we possess a \"rational soul\" — the ability to reason, think abstractly, and reflect.  \n-Aristotle","Notes on \"A Brief History of Intelligence\"",{"id":341,"publish_date":342,"is_original":23,"collection":343,"cover_url":344,"cover_url_1_1":345,"title":346,"summary":347,"author":28},184,"2024-09-02","#History","article_res/cover/079330c1007f5b88ef5f6718c4e8f082.jpeg","article_res/cover/feb4549db6100e553b7d1b0178509bad.jpeg","Why did the First Industrial Revolution happen in Britain?","Set People Free. \n- Winston Churchill",{"id":349,"publish_date":350,"is_original":4,"collection":5,"cover_url":351,"cover_url_1_1":352,"title":353,"summary":354,"author":28},406,"2023-09-23","article_res/cover/bd67893c58b8e3f836d02761666b2f09.jpeg","article_res/cover/6e778293b0366ac4589f418643e10df2.jpeg","Erik Voorhees' speech at Permissionless II","The laws of physics, the laws of mathematics, the laws of code.",{"id":356,"publish_date":357,"is_original":4,"collection":334,"cover_url":358,"cover_url_1_1":359,"title":360,"summary":361,"author":339},157,"2024-10-07","article_res/cover/b574794646956494826447bc68fae09b.jpeg","article_res/cover/9f2351c7a240ea32a35128468cba14d7.jpeg","【A Brief History of Intelligence】5. Mentalizing (Primates)","Theory of mind, imitation learning, and anticipating future needs would have been adaptive in early primates.",[363,369,375],{"title":10,"list":364},[365,366,367,368],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":370},[371,372,373,374],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},{"title":242,"list":376},[],[8,9,10],[8,12,13,14,9,10,15,16,17,18],["Reactive",245],1754646417364]