[{"data":1,"prerenderedAt":377},["ShallowReactive",2],{"$fgukOamtKU1RtUiMFsqdObttmqPPQz0uc7bl_gj_LyX0":3,"$fqYC5H0LFcmmaWsWkTrHePGVTD-ZYHp5SxIYOx7NGD0I":245,"article-5":376},{"code":4,"msg":5,"data":6},0,"",{"category":7,"tag":11,"popular":19,"latest":86,"banner":126,"list":151,"cache":244},[8,9,10],"Agent","OpenAI","LLM",[8,12,13,14,9,10,15,16,17,18],"Google","Nvidia","Claude","DeepSeek","OCR","Chat","Generator",[20,29,37,45,54,62,70,79],{"id":21,"publish_date":22,"is_original":23,"collection":5,"cover_url":24,"cover_url_1_1":25,"title":26,"summary":27,"author":28},411,"2023-09-10",1,"article_res/cover/451ef50c225a8dc61c4336506794d13b.jpeg","article_res/cover/3ba9dc7a72f87d40b20fc2d225289ee3.jpeg","Idealism","Reality is created by the mind, we can change our reality by changing our mind. - Plato","Renee's Entrepreneurial Journey",{"id":30,"publish_date":31,"is_original":23,"collection":32,"cover_url":33,"cover_url_1_1":34,"title":35,"summary":36,"author":28},108,"2024-12-07","#LLM #AGI #AI Agent","article_res/cover/0039044422e4ec9f61c18e8ee1693bb0.jpeg","article_res/cover/4220971b108a91d21407d87bb02fbaa6.jpeg","Freysa.ai: The World's First Adversarial AI Agent Game","说服 Freysa 把钱包里的钱都拿出来",{"id":38,"publish_date":39,"is_original":23,"collection":40,"cover_url":41,"cover_url_1_1":42,"title":43,"summary":44,"author":28},12,"2025-03-09","#Oxford #Reasoning #LLM #Tool Use","article_res/cover/d448e9b3617a0b5302e1bd10c438bca9.jpeg","article_res/cover/864a468f9cc4c9317efadb3811909888.jpeg","Agentic Reasoning Framework - Significantly enhance the reasoning ability of LLMs through the integration of external tools using agents","Agentic Reasoning: Reasoning LLMs with Tools for Deep Research",{"id":46,"publish_date":47,"is_original":4,"collection":48,"cover_url":49,"cover_url_1_1":50,"title":51,"summary":52,"author":53},480,"2023-04-14","#Stable Diffusion","article_res/cover/0bdbe7cb1de4a78e54536e5d9afa7ec9.jpeg","article_res/cover/b3d6ffec0608dcfaf18c5a69906d1490.jpeg","【AIGC Learning】Generate Prompts Using Word Graphs - Stable Diffusion Web UI Series 13","AI will become a powerful tool in education, transforming the way we learn and deliver instruction.  \n- Reid Hoffman","--",{"id":55,"publish_date":56,"is_original":4,"collection":57,"cover_url":58,"cover_url_1_1":59,"title":60,"summary":61,"author":28},413,"2023-09-08","#Neuroscience","article_res/cover/74f8302d78a23d9430f22171eae136b6.jpeg","article_res/cover/87ca08af81bb304746be5261160964c0.jpeg","Can machines be conscious?","Do we have an ethical obligation to not turn off conscious machines? Would turning them off be murder? No. I don't lose any sleep over unplugging a conscious machine.\n- Jeff Hawkins, \"A Thousand Brains\"",{"id":63,"publish_date":64,"is_original":23,"collection":65,"cover_url":66,"cover_url_1_1":67,"title":68,"summary":69,"author":28},178,"2024-09-09","#Entrepreneurship","article_res/cover/a7224f025b55d1820408085faef63079.jpeg","article_res/cover/11a9995b096cbf64465ef01b8673b154.jpeg","37signals company","This damn sense of relaxation",{"id":71,"publish_date":72,"is_original":4,"collection":73,"cover_url":74,"cover_url_1_1":75,"title":76,"summary":77,"author":78},460,"2023-05-12","#Google","article_res/cover/b970687b12faa52da976f91248c2aa7b.jpeg","article_res/cover/d1e71b52cfd2c63bc6e71f3e85ff135c.jpeg","Learn what BRC-20 and Ordinals are using Google Bard","Ordinals - a new protocol that allows users to store arbitrary data on the Bitcoin blockchain","Google Bard mainly writes",{"id":80,"publish_date":81,"is_original":23,"collection":5,"cover_url":82,"cover_url_1_1":83,"title":84,"summary":85,"author":28},309,"2024-03-26","article_res/cover/9877f95894ee88532d0e6012c23a2df3.jpeg","article_res/cover/20092164ddc109ce6ae56b1984246751.jpeg","Learning the Cancun Upgrade with lepton and perplexity","Building a quick conversation-based search demo with Lepton AI.",[87,95,103,111,119],{"id":88,"publish_date":89,"is_original":23,"collection":90,"cover_url":91,"cover_url_1_1":92,"title":93,"summary":94,"author":28},627,"2025-03-20","#AI Avatar #AI Video Generation","article_res/cover/d95481358f73924989f8c4ee9c75d1c8.jpeg","article_res/cover/b74bc0fab01f8b6a6aa87696c0c3ed8b.jpeg","DisPose: Generating Animated Videos by Driving Video with Reference Images","DisPose is a controllable human image animation method that enhances video generation.",{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},626,"2025-03-21","#Deep Dive into LLMs #LLM #RL #Andrej Karpathy #AlphaGo","article_res/cover/446553a5c8f8f2f07d97b20eaee84e56.jpeg","article_res/cover/e6c2823409c9b34624064b9acbaca6f1.jpeg","AlphaGo and the Power of Reinforcement Learning - Andrej Karpathy's Deep Dive on LLMs (Part 9)","Simply learning from humans will never surpass human capabilities.",{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},625,"2025-03-22","#Deep Dive into LLMs #LLM #RL #RLHF #Andrej Karpathy","article_res/cover/8da81d38b1e5cf558a164710fd8a5389.jpeg","article_res/cover/96f028d76c362a99a0dd56389e8f7a9b.jpeg","Reinforcement Learning from Human Feedback (RLHF) - Andrej Karpathy's Deep Dive on LLMs (Part 10)","Fine-Tuning Language Models from Human Preferences",{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},624,"2025-03-23","#Deep Dive into LLMs #LLM #Andrej Karpathy #AI Agent #MMM","article_res/cover/a5e7c3d48bb09109684d6513287c661d.jpeg","article_res/cover/d3f22b7c0ab8d82fd2da457a299e0773.jpeg","The Future of Large Language Models - Andrej Karpathy's In-Depth Explanation of LLM (Part 11)","preview of things to come",{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},623,"#Google #Voe #AI Video Generation","article_res/cover/c44062fea0f336c2b96b3928292392c2.jpeg","article_res/cover/a041041c69092ad3db191c5bf3ff981b.jpeg","Trial of Google's video generation model VOE2","Our state-of-the-art video generation model",[127,135,143],{"id":128,"publish_date":129,"is_original":23,"collection":130,"cover_url":131,"cover_url_1_1":132,"title":133,"summary":134,"author":28},300,"2024-04-16","#AI in Science #AGI","article_res/cover/6bf01e793e0f33e848572412eebdf9b0.jpeg","article_res/cover/91a5ee21dafecb914fabeb9430d46ec1.jpeg","Would Einstein lose his job - AI and Quantum Computing: A Glimpse into the Near Future","So Einstein's job is still safe.",{"id":136,"publish_date":137,"is_original":23,"collection":138,"cover_url":139,"cover_url_1_1":140,"title":141,"summary":142,"author":28},101,"2024-12-14","#Nvidia #AI 3D Generator","article_res/cover/693e07c85980c5c0c8fde3f037733f23.jpeg","article_res/cover/9ea8edff2d5d303ff3fffff3f6f9c3d9.jpeg","NVIDIA's open-source 3D project LLaMA-Mesh","LLaMA-Mesh: Unifying 3D Mesh Generation with Language Models",{"id":144,"publish_date":145,"is_original":23,"collection":146,"cover_url":147,"cover_url_1_1":148,"title":149,"summary":150,"author":28},131,"2024-11-10","#OpenAI","article_res/cover/87f8ed353ce39f31960e7cdfaf075a35.jpeg","article_res/cover/f597a63935f5cd32e484b4aadd6019e8.jpeg","ChatGPT has launched the Search function","Get fast, timely answers with links to relevant web sources.",{"big":152,"small":214},[153,181],{"title":154,"list":155},"AGENT",[156,157,165,173],{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":158,"publish_date":159,"is_original":23,"collection":160,"cover_url":161,"cover_url_1_1":162,"title":163,"summary":164,"author":28},622,"2025-03-24","#OWL #AI Agent #MAS #MCP #CUA","article_res/cover/cb50ca7f2bf4d1ed50202d7406e1c19a.jpeg","article_res/cover/4aa7aa3badfacf3cc84121334f1050dd.jpeg","OWL: Multi-agent collaboration","OWL: Optimized Workforce Learning for General Multi-Agent Assistance in Real-World Task Automation",{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},620,"2025-03-26","#LLM #Google #Gemini #AI Agent","article_res/cover/53751a6dbbe990b1eb0b63f3b062aed4.jpeg","article_res/cover/031344981f0a212ff82d1f3a64aa5756.jpeg","Gemini 2.5 Pro, claimed to be far ahead of the competition, has been released with great fanfare: comprehensively surpassing other LLMs and topping the global rankings","Gemini 2.5: Our most intelligent AI model",{"id":174,"publish_date":175,"is_original":23,"collection":176,"cover_url":177,"cover_url_1_1":178,"title":179,"summary":180,"author":28},616,"2025-03-29","#MAS #AI Agent #AI Coder #MetaGPT #MGX","article_res/cover/9dcd702ad2035902e5e77967c34a1f1e.jpeg","article_res/cover/0a97fc4a922753c8f46ff38792020df8.jpeg","MGX - An automated website-building platform composed of multiple AI Agents","Your 24/7 AI Team | Dream, Chat, Create.",{"title":182,"list":183},"OPENAI",[184,191,199,206],{"id":185,"publish_date":167,"is_original":23,"collection":186,"cover_url":187,"cover_url_1_1":188,"title":189,"summary":190,"author":28},619,"#OpenAI #AI Image Generator #4o #MMM #AR Transformer","article_res/cover/2faffc97fcecf3151552cb0fd3206d89.jpeg","article_res/cover/1133cb4948af44cee2e7fbe79efb69e5.jpeg","The native image function of GPT-4o is officially launched","Introducing 4o Image Generation",{"id":192,"publish_date":193,"is_original":4,"collection":194,"cover_url":195,"cover_url_1_1":196,"title":197,"summary":198,"author":28},434,"2023-07-15","#Anthropic #OpenAI #Google #AI Code Generator #Claude","article_res/cover/e1b6f600a2b9f262a4392684e5f2ce25.jpeg","article_res/cover/6e1772e83f78f9a351ab23d3e414adee.jpeg","Latest Updates on Google Bard /Anthropic Claude2 / ChatGPT Code Interpreter","We want our models to use their programming skills to provide more natural interfaces to the basic functions of our computers.  \n - OpenAI",{"id":200,"publish_date":201,"is_original":4,"collection":146,"cover_url":202,"cover_url_1_1":203,"title":204,"summary":205,"author":28},417,"2023-08-24","article_res/cover/bccf897d50a88b18364e35f7466387e0.jpeg","article_res/cover/2f871085c1073717c1703ae86e18056f.jpeg","The GPT-3.5 Turbo fine-tuning (fine-tuning function) has been released～","Developers can now bring their own data to customize GPT-3.5 Turbo for their use cases.",{"id":207,"publish_date":208,"is_original":4,"collection":209,"cover_url":210,"cover_url_1_1":211,"title":212,"summary":213,"author":28},407,"2023-09-22","#OpenAI #AI Image Generator","article_res/cover/c59005e903d35cfc32346e2756e2728a.jpeg","article_res/cover/ba011d265e6d84b5c8cb6fd6b757b6cc.jpeg","Dall-E 3","DALL·E 3 understands significantly more nuance and detail, allowing you to easily translate your ideas into images.",[215,221,241],{"title":10,"list":216},[217,218,219,220],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":223},"GOOGLE",[224,225,226,234],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},615,"2025-03-30","#AI Researcher #AI Science #HKU #Google #AI Agent","article_res/cover/21fadf906067714bb0db31ae13a77c15.jpeg","article_res/cover/2697999a72bd26b22e85f0e92936d3ed.jpeg","AI-Researcher: LLM-driven全自动 scientific research assistant","AI-Researcher: Fully-Automated Scientific Discovery with LLM Agents  \nOpen-Sourced Alternative to Google AI Co-Scientist",{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},463,"2023-05-09","article_res/cover/89800f207723acdb55fc53bf999ebdc9.jpeg","article_res/cover/5764f369b4accd8f83e94aa4c077a175.jpeg","The Smallville sandbox world - A town with 25 virtual residents","Believable proxies of human behavior can empower interactive apps: Immersive environment, Rehearsal space, Prototyping tool",{"title":242,"list":243},"NVIDIA",[],true,{"code":4,"msg":5,"data":246},{"id":247,"publish_date":248,"is_original":23,"collection":249,"articles_id":250,"cover_url":251,"cover_url_1_1":252,"title":253,"summary":254,"author":28,"content":255,"popular":256,"list":318,"category":374,"tag":375},5,"2025-03-15","#Meta #AI Video Generator","h1AImgpfZbzwtb2KMdqOzQ","article_res/cover/6d289c936dcfa91f777ce3f553b01382.jpeg","article_res/cover/49d1007ac1b4cc08a3fa19c40455d915.jpeg","Meta's latest released video generation model VideoJAM","VideoJAM: Joint Appearance-Motion Representations for Enhanced Motion Generation in Video Models","\u003Cdiv class=\"rich_media_content js_underline_content\n                       autoTypeSetting24psection\n            \" id=\"js_content\">\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">Previously, there were trials using Pika and Sora to generate gymnastics videos, but each gymnast ended up with a bizarre phenomenon of having \"three heads and six arms.\" This distortion reflects a common issue in current video generation models — although the visual effects are impressive, there are still shortcomings in terms of motion coherence and authenticity.\u003C/span>\u003C/p>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">Recently, Meta launched\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">VideoJAM\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">a model specifically aimed at addressing this problem. According to Meta's published paper and demonstrations (which are currently not yet available for use), the VideoJAM model no longer focuses solely on pixel-level quality but introduces \"joint appearance-motion representation\" to ensure the naturalness and coherence of actions in generated videos.\u003C/span>\u003C/p>\u003Csection nodeleaf=\"\">\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"./assets/17423768842220.14244454740782864.mp4\" poster=\"./assets/17423768831280.7035614417956331.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">Notably, VideoJAM can be applied to any existing video generation model without requiring additional modifications to training data or increasing the model size, demonstrating strong versatility.\u003C/span>\u003C/p>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">Although Meta has only provided papers and demonstration videos so far without opening up actual usage of the model, from the existing demo results, VideoJAM has already significantly surpassed other existing models, particularly excelling in motion coherence while also enhancing overall visual quality.\u003C/span>\u003C/p>\u003Ch2 style='box-sizing: border-box;margin: 30px 0px 15px;color: rgba(0, 0, 0, 0.85);font-weight: 500;cursor: pointer;padding: 0px;display: block;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan style=\"box-sizing: border-box;cursor: pointer;font-size: 22px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;text-align: left;font-weight: bold;display: block;\">\u003Cspan leaf=\"\">How the VideoJAM model works\u003C/span>\u003C/span>\u003C/h2>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">The core concept of the VideoJAM model is to inject stronger\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">motion prior knowledge\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">into the video generation model, thereby improving the motion consistency of the generated videos. The model consists of two key stages:\u003C/span>\u003C/p>\u003Csection style=\"text-align: center;\" nodeleaf=\"\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-ratio=\"0.33240740740740743\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" type=\"block\" data-imgfileid=\"100010674\" style=\"height: auto !important;\" src=\"./assets/17423768845030.6669862694929201.jpeg\">\u003C/section>\u003Cul style='box-sizing: border-box;margin: 8px 0px;cursor: pointer;list-style-type: disc;padding: 0px 0px 0px 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;' class=\"list-paddingleft-1\">\u003Cli style=\"box-sizing: border-box;cursor: pointer;\">\u003Csection style=\"box-sizing: border-box;cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);font-size: 16px;line-height: 1.8em;letter-spacing: 0em;text-align: left;font-weight: normal;\">\u003Cp style=\"box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: 0em;text-align: left;text-indent: 0em;padding: 8px 0px;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">Training phase\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">: The model not only learns to predict the pixels of the generated frames but also simultaneously predicts the motion within the frames.\u003C/span>\u003Cspan leaf=\"\">\u003Cbr>\u003C/span>\u003Cspan leaf=\"\">Given an input video (x_1) and its corresponding motion representation (d_1), both are noised and embedded into a unified joint latent space representation via a linear layer ((W_{in+})). Subsequently, the diffusion model processes this joint representation and predicts appearance and motion separately through two linear projection layers ((W_{out+})).\u003C/span>\u003C/p>\u003C/section>\u003C/li>\u003Cli style=\"box-sizing: border-box;cursor: pointer;\">\u003Csection style=\"box-sizing: border-box;cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);font-size: 16px;line-height: 1.8em;letter-spacing: 0em;text-align: left;font-weight: normal;\">\u003Cp style=\"box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: 0em;text-align: left;text-indent: 0em;padding: 8px 0px;\">\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">Inference stage\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">: Effectively improves the consistency of video motions.\u003C/span>\u003Cspan leaf=\"\">\u003Cbr>\u003C/span>\u003Cspan leaf=\"\">The model employs a mechanism called \"Inner-Guidance,\" which uses the noisy motion information predicted by the model itself at each step of video generation to guide subsequent predictions, thereby significantly enhancing the coherence of motion in the video.\u003C/span>\u003C/p>\u003C/section>\u003C/li>\u003C/ul>\u003Ch2 style='box-sizing: border-box;margin: 30px 0px 15px;color: rgba(0, 0, 0, 0.85);font-weight: 500;cursor: pointer;padding: 0px;display: block;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan style=\"box-sizing: border-box;cursor: pointer;font-size: 22px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;text-align: left;font-weight: bold;display: block;\">\u003Cspan leaf=\"\">Demonstration of the generation effects of the VideoJAM model\u003C/span>\u003C/span>\u003C/h2>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">Here we showcase the results generated by Meta's latest launch of the\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">VideoJAM-30B\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">model, which produces high-quality video results. The test scenarios all involve complex and highly challenging tasks.\u003C/span>\u003C/p>\u003Csection nodeleaf=\"\">\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"./assets/17423768834490.42920172185011474.mp4\" poster=\"./assets/17423768832820.19471933256345308.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003Ch2 style='box-sizing: border-box;margin: 30px 0px 15px;color: rgba(0, 0, 0, 0.85);font-weight: 500;cursor: pointer;padding: 0px;display: block;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;letter-spacing: normal;orphans: 2;text-align: left;text-indent: 0px;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan style=\"box-sizing: border-box;cursor: pointer;font-size: 22px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;text-align: left;font-weight: bold;display: block;\">\u003Cspan leaf=\"\">Qualitative comparison with leading models: VideoJAM-bench benchmark evaluation\u003C/span>\u003C/span>\u003C/h2>\u003Cp style='box-sizing: border-box;margin: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;text-indent: 0px;padding: 8px 0px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-style: normal;font-variant-ligatures: normal;font-variant-caps: normal;font-weight: 400;orphans: 2;text-transform: none;widows: 2;word-spacing: 0px;-webkit-text-stroke-width: 0px;white-space: normal;background-color: rgb(255, 255, 255);text-decoration-thickness: initial;text-decoration-style: initial;text-decoration-color: initial;'>\u003Cspan leaf=\"\">We also conducted evaluations using the VideoJAM-bench benchmark to compare the VideoJAM model with currently leading proprietary models in the industry, such as\u003C/span>\u003Cstrong style=\"box-sizing: border-box;font-weight: bold;cursor: pointer;color: rgb(0, 0, 0);background: none 0% 0% / auto no-repeat scroll padding-box border-box rgba(0, 0, 0, 0);width: auto;height: auto;margin: 0px;padding: 0px;border-style: none;border-width: 3px;border-color: rgba(0, 0, 0, 0.4);border-radius: 0px;\">\u003Cspan leaf=\"\">Sora, Kling, and Runway Gen3\u003C/span>\u003C/strong>\u003Cspan leaf=\"\">) and the base model (DiT-30B). The test content was selected from representative action generation tasks. The results show that VideoJAM surpasses these existing leading models in terms of motion coherence and overall video quality, demonstrating significant advantages.\u003C/span>\u003C/p>\u003Csection nodeleaf=\"\">\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"./assets/17423768843350.8355394138249816.mp4\" poster=\"./assets/17423768831550.6382536681544146.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003Cp style=\"display: none;\">\u003Cmp-style-type data-value=\"3\">\u003C/mp-style-type>\u003C/p>\u003C/div>",[257,266,273,280,287,295,303,311],{"id":258,"title_md5":259,"publish_date":260,"author_md5":261,"is_original":23,"collection":262,"summary_md5":263,"cover_url":264,"cover_url_1_1":265},143,"a061015f874d6851209efb77e47ca60d","2024-10-26","bc27fa490c4d0d525bac812fc0793534","#Stable Diffusion #AI Image Generator","0e92371c1373ddfcb938490ba79311d4","article_res/cover/0b0ca52a11ce139fe92b50beb171d22b.jpeg","article_res/cover/73dc2ead46926f56c16a187454ccc397.jpeg",{"id":267,"title_md5":268,"publish_date":269,"author_md5":261,"is_original":23,"collection":65,"summary_md5":270,"cover_url":271,"cover_url_1_1":272},384,"7bb7c7fbd03dc5376edcc962db85f175","2023-11-01","b37e0e398524420ea3aaa397c8322fb7","article_res/cover/1b86a197902e7d5047f17893294c3fa6.jpeg","article_res/cover/85019e4faccc040f71c3c3223f707c36.jpeg",{"id":274,"title_md5":275,"publish_date":276,"author_md5":261,"is_original":4,"collection":146,"summary_md5":277,"cover_url":278,"cover_url_1_1":279},382,"eb2aa07fa9811f982022b85efc91d432","2023-11-10","bd57785052e79477d9d2356e01cdfc17","article_res/cover/f3614fc55023f05303030c499936e0f3.jpeg","article_res/cover/63d26a9fb3be7c181f32c9c9f630fd4b.jpeg",{"id":281,"title_md5":282,"publish_date":283,"author_md5":261,"is_original":23,"collection":5,"summary_md5":284,"cover_url":285,"cover_url_1_1":286},197,"a971baff48148b8ed9acc1ca7ab99c18","2024-08-14","0eed6772828808601f389b4bac8f4d0a","article_res/cover/22174743c1501f73c6b18fe0c76eaba5.jpeg","article_res/cover/a4056722ad71dc449138929b900a70fd.jpeg",{"id":288,"title_md5":289,"publish_date":290,"author_md5":261,"is_original":4,"collection":291,"summary_md5":292,"cover_url":293,"cover_url_1_1":294},294,"d93f9a17cce091e8f12c8a15cae4026e","2024-04-24","#Psychology","584cf8d0bc2ca2d6bbaea02a7737fc73","article_res/cover/c902e91e56ee3bc22021b1e0518c6379.jpeg","article_res/cover/cc4341697c8d9a39e5078993fc9fcbce.jpeg",{"id":296,"title_md5":297,"publish_date":298,"author_md5":261,"is_original":23,"collection":299,"summary_md5":300,"cover_url":301,"cover_url_1_1":302},4,"2a89ef823c4c1ea3a34a3dae0ff1419b","2025-03-16","#VLM-R1 #Large Vision-Language Model #GRPO","d3daa0f413010e2b97a2dc97ff1317dc","article_res/cover/262d831f5b3b6f3958d243586327d2f2.jpeg","article_res/cover/0b297b83de0f7b4572aac35ac4f14507.jpeg",{"id":304,"title_md5":305,"publish_date":306,"author_md5":261,"is_original":23,"collection":307,"summary_md5":308,"cover_url":309,"cover_url_1_1":310},224,"6750b90e233de5af6a744a3ec322b2c9","2024-07-09","#AI Avatar","aa36a9e66a4e0603e8124b0c52a669cf","article_res/cover/670fbb56c51f582e4b359a0b5c9eefd0.jpeg","article_res/cover/c9a84424a4c803f0e2565e847e9f36ac.jpeg",{"id":312,"title_md5":313,"publish_date":314,"author_md5":261,"is_original":23,"collection":5,"summary_md5":315,"cover_url":316,"cover_url_1_1":317},190,"f5d37c13a930c6eac1823e73c4af23c6","2024-08-22","35d993172c4eef0c90801f2583813a11","article_res/cover/3ddc7f4e0510f58fadfdf890948fc9cb.jpeg","article_res/cover/3b2a65ef8027ae21863178afee691633.jpeg",{"related":319,"small":359},[320,328,335,343,351],{"id":321,"publish_date":322,"is_original":23,"collection":323,"cover_url":324,"cover_url_1_1":325,"title":326,"summary":327,"author":28},437,"2023-07-03","#AI Video Generator","article_res/cover/563dc38b306379275d420c24750a763d.jpeg","article_res/cover/c5c379fa097d64acce93795d447b73fb.jpeg","RERENDER A VIDEO - Maintain temporal coherence of global styles and local textures in videos.","This paper proposes a novel zero-shot text-guided video-to-video translation framework to adapt image models to videos.",{"id":329,"publish_date":330,"is_original":23,"collection":307,"cover_url":331,"cover_url_1_1":332,"title":333,"summary":334,"author":28},213,"2024-07-22","article_res/cover/2d2bca4edde06eec3d7208e1d73355ca.jpeg","article_res/cover/453434f4f2d1f4ef2fe91456155aba6c.jpeg","SMooDi - AI Generates Realistic and Stylized Human Motions","Today I read a paper titled SMooDi: Stylized Motion Diffusion Model, which is currently still",{"id":336,"publish_date":337,"is_original":23,"collection":338,"cover_url":339,"cover_url_1_1":340,"title":341,"summary":342,"author":28},11,"2025-03-10","#AI Agents #AI Coder #Operator #OpenAI #Replit","article_res/cover/7158ed3b6308013fdcd6191afd7b33ee.jpeg","article_res/cover/7a04464dca913878218970fa718957a5.jpeg","AI helps AI: OpenAI's Operator collaborates with Replit's Agent to successfully develop an application.","Just paired openai operator with replit agent to build an app",{"id":344,"publish_date":345,"is_original":23,"collection":346,"cover_url":347,"cover_url_1_1":348,"title":349,"summary":350,"author":28},264,"2024-05-26","#Google #DeepMind #Voe #Imagen3","article_res/cover/cec3437d945bc384856e90bb19de83f4.jpeg","article_res/cover/3b1e6608a6260acbe0d48507dea22e26.jpeg","What topics related to generative AI were covered at Google I/O 2024","Google's mission to organize the world's information and make it universally accessible and useful",{"id":352,"publish_date":353,"is_original":4,"collection":291,"cover_url":354,"cover_url_1_1":355,"title":356,"summary":357,"author":358},587,"2022-03-30","article_res/cover/159eaa627dae6a3aa2f0138aaa3079e9.jpeg","article_res/cover/3247d6e9cc489a24c763f0b261a4089a.jpeg","From the \"Two Whatevers\" to seeking truth from facts","An error doesn't become a mistake until you refuse to correct it.","Psychological counseling records",[360,366,372],{"title":10,"list":361},[362,363,364,365],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":367},[368,369,370,371],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},{"title":242,"list":373},[],[8,9,10],[8,12,13,14,9,10,15,16,17,18],["Reactive",245],1754646408333]