[{"data":1,"prerenderedAt":374},["ShallowReactive",2],{"$fgukOamtKU1RtUiMFsqdObttmqPPQz0uc7bl_gj_LyX0":3,"$fh6h5XDmhh4InrXYjMRmMjx-VlR5-COMF4u0Pl4GxxG4":245,"article-320":373},{"code":4,"msg":5,"data":6},0,"",{"category":7,"tag":11,"popular":19,"latest":86,"banner":126,"list":151,"cache":244},[8,9,10],"Agent","OpenAI","LLM",[8,12,13,14,9,10,15,16,17,18],"Google","Nvidia","Claude","DeepSeek","OCR","Chat","Generator",[20,29,37,45,54,62,70,79],{"id":21,"publish_date":22,"is_original":23,"collection":5,"cover_url":24,"cover_url_1_1":25,"title":26,"summary":27,"author":28},411,"2023-09-10",1,"article_res/cover/451ef50c225a8dc61c4336506794d13b.jpeg","article_res/cover/3ba9dc7a72f87d40b20fc2d225289ee3.jpeg","Idealism","Reality is created by the mind, we can change our reality by changing our mind. - Plato","Renee's Entrepreneurial Journey",{"id":30,"publish_date":31,"is_original":23,"collection":32,"cover_url":33,"cover_url_1_1":34,"title":35,"summary":36,"author":28},108,"2024-12-07","#LLM #AGI #AI Agent","article_res/cover/0039044422e4ec9f61c18e8ee1693bb0.jpeg","article_res/cover/4220971b108a91d21407d87bb02fbaa6.jpeg","Freysa.ai: The World's First Adversarial AI Agent Game","说服 Freysa 把钱包里的钱都拿出来",{"id":38,"publish_date":39,"is_original":23,"collection":40,"cover_url":41,"cover_url_1_1":42,"title":43,"summary":44,"author":28},12,"2025-03-09","#Oxford #Reasoning #LLM #Tool Use","article_res/cover/d448e9b3617a0b5302e1bd10c438bca9.jpeg","article_res/cover/864a468f9cc4c9317efadb3811909888.jpeg","Agentic Reasoning Framework - Significantly enhance the reasoning ability of LLMs through the integration of external tools using agents","Agentic Reasoning: Reasoning LLMs with Tools for Deep Research",{"id":46,"publish_date":47,"is_original":4,"collection":48,"cover_url":49,"cover_url_1_1":50,"title":51,"summary":52,"author":53},480,"2023-04-14","#Stable Diffusion","article_res/cover/0bdbe7cb1de4a78e54536e5d9afa7ec9.jpeg","article_res/cover/b3d6ffec0608dcfaf18c5a69906d1490.jpeg","【AIGC Learning】Generate Prompts Using Word Graphs - Stable Diffusion Web UI Series 13","AI will become a powerful tool in education, transforming the way we learn and deliver instruction.  \n- Reid Hoffman","--",{"id":55,"publish_date":56,"is_original":4,"collection":57,"cover_url":58,"cover_url_1_1":59,"title":60,"summary":61,"author":28},413,"2023-09-08","#Neuroscience","article_res/cover/74f8302d78a23d9430f22171eae136b6.jpeg","article_res/cover/87ca08af81bb304746be5261160964c0.jpeg","Can machines be conscious?","Do we have an ethical obligation to not turn off conscious machines? Would turning them off be murder? No. I don't lose any sleep over unplugging a conscious machine.\n- Jeff Hawkins, \"A Thousand Brains\"",{"id":63,"publish_date":64,"is_original":23,"collection":65,"cover_url":66,"cover_url_1_1":67,"title":68,"summary":69,"author":28},178,"2024-09-09","#Entrepreneurship","article_res/cover/a7224f025b55d1820408085faef63079.jpeg","article_res/cover/11a9995b096cbf64465ef01b8673b154.jpeg","37signals company","This damn sense of relaxation",{"id":71,"publish_date":72,"is_original":4,"collection":73,"cover_url":74,"cover_url_1_1":75,"title":76,"summary":77,"author":78},460,"2023-05-12","#Google","article_res/cover/b970687b12faa52da976f91248c2aa7b.jpeg","article_res/cover/d1e71b52cfd2c63bc6e71f3e85ff135c.jpeg","Learn what BRC-20 and Ordinals are using Google Bard","Ordinals - a new protocol that allows users to store arbitrary data on the Bitcoin blockchain","Google Bard mainly writes",{"id":80,"publish_date":81,"is_original":23,"collection":5,"cover_url":82,"cover_url_1_1":83,"title":84,"summary":85,"author":28},309,"2024-03-26","article_res/cover/9877f95894ee88532d0e6012c23a2df3.jpeg","article_res/cover/20092164ddc109ce6ae56b1984246751.jpeg","Learning the Cancun Upgrade with lepton and perplexity","Building a quick conversation-based search demo with Lepton AI.",[87,95,103,111,119],{"id":88,"publish_date":89,"is_original":23,"collection":90,"cover_url":91,"cover_url_1_1":92,"title":93,"summary":94,"author":28},627,"2025-03-20","#AI Avatar #AI Video Generation","article_res/cover/d95481358f73924989f8c4ee9c75d1c8.jpeg","article_res/cover/b74bc0fab01f8b6a6aa87696c0c3ed8b.jpeg","DisPose: Generating Animated Videos by Driving Video with Reference Images","DisPose is a controllable human image animation method that enhances video generation.",{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},626,"2025-03-21","#Deep Dive into LLMs #LLM #RL #Andrej Karpathy #AlphaGo","article_res/cover/446553a5c8f8f2f07d97b20eaee84e56.jpeg","article_res/cover/e6c2823409c9b34624064b9acbaca6f1.jpeg","AlphaGo and the Power of Reinforcement Learning - Andrej Karpathy's Deep Dive on LLMs (Part 9)","Simply learning from humans will never surpass human capabilities.",{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},625,"2025-03-22","#Deep Dive into LLMs #LLM #RL #RLHF #Andrej Karpathy","article_res/cover/8da81d38b1e5cf558a164710fd8a5389.jpeg","article_res/cover/96f028d76c362a99a0dd56389e8f7a9b.jpeg","Reinforcement Learning from Human Feedback (RLHF) - Andrej Karpathy's Deep Dive on LLMs (Part 10)","Fine-Tuning Language Models from Human Preferences",{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},624,"2025-03-23","#Deep Dive into LLMs #LLM #Andrej Karpathy #AI Agent #MMM","article_res/cover/a5e7c3d48bb09109684d6513287c661d.jpeg","article_res/cover/d3f22b7c0ab8d82fd2da457a299e0773.jpeg","The Future of Large Language Models - Andrej Karpathy's In-Depth Explanation of LLM (Part 11)","preview of things to come",{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},623,"#Google #Voe #AI Video Generation","article_res/cover/c44062fea0f336c2b96b3928292392c2.jpeg","article_res/cover/a041041c69092ad3db191c5bf3ff981b.jpeg","Trial of Google's video generation model VOE2","Our state-of-the-art video generation model",[127,135,143],{"id":128,"publish_date":129,"is_original":23,"collection":130,"cover_url":131,"cover_url_1_1":132,"title":133,"summary":134,"author":28},300,"2024-04-16","#AI in Science #AGI","article_res/cover/6bf01e793e0f33e848572412eebdf9b0.jpeg","article_res/cover/91a5ee21dafecb914fabeb9430d46ec1.jpeg","Would Einstein lose his job - AI and Quantum Computing: A Glimpse into the Near Future","So Einstein's job is still safe.",{"id":136,"publish_date":137,"is_original":23,"collection":138,"cover_url":139,"cover_url_1_1":140,"title":141,"summary":142,"author":28},101,"2024-12-14","#Nvidia #AI 3D Generator","article_res/cover/693e07c85980c5c0c8fde3f037733f23.jpeg","article_res/cover/9ea8edff2d5d303ff3fffff3f6f9c3d9.jpeg","NVIDIA's open-source 3D project LLaMA-Mesh","LLaMA-Mesh: Unifying 3D Mesh Generation with Language Models",{"id":144,"publish_date":145,"is_original":23,"collection":146,"cover_url":147,"cover_url_1_1":148,"title":149,"summary":150,"author":28},131,"2024-11-10","#OpenAI","article_res/cover/87f8ed353ce39f31960e7cdfaf075a35.jpeg","article_res/cover/f597a63935f5cd32e484b4aadd6019e8.jpeg","ChatGPT has launched the Search function","Get fast, timely answers with links to relevant web sources.",{"big":152,"small":214},[153,181],{"title":154,"list":155},"AGENT",[156,157,165,173],{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":158,"publish_date":159,"is_original":23,"collection":160,"cover_url":161,"cover_url_1_1":162,"title":163,"summary":164,"author":28},622,"2025-03-24","#OWL #AI Agent #MAS #MCP #CUA","article_res/cover/cb50ca7f2bf4d1ed50202d7406e1c19a.jpeg","article_res/cover/4aa7aa3badfacf3cc84121334f1050dd.jpeg","OWL: Multi-agent collaboration","OWL: Optimized Workforce Learning for General Multi-Agent Assistance in Real-World Task Automation",{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},620,"2025-03-26","#LLM #Google #Gemini #AI Agent","article_res/cover/53751a6dbbe990b1eb0b63f3b062aed4.jpeg","article_res/cover/031344981f0a212ff82d1f3a64aa5756.jpeg","Gemini 2.5 Pro, claimed to be far ahead of the competition, has been released with great fanfare: comprehensively surpassing other LLMs and topping the global rankings","Gemini 2.5: Our most intelligent AI model",{"id":174,"publish_date":175,"is_original":23,"collection":176,"cover_url":177,"cover_url_1_1":178,"title":179,"summary":180,"author":28},616,"2025-03-29","#MAS #AI Agent #AI Coder #MetaGPT #MGX","article_res/cover/9dcd702ad2035902e5e77967c34a1f1e.jpeg","article_res/cover/0a97fc4a922753c8f46ff38792020df8.jpeg","MGX - An automated website-building platform composed of multiple AI Agents","Your 24/7 AI Team | Dream, Chat, Create.",{"title":182,"list":183},"OPENAI",[184,191,199,206],{"id":185,"publish_date":167,"is_original":23,"collection":186,"cover_url":187,"cover_url_1_1":188,"title":189,"summary":190,"author":28},619,"#OpenAI #AI Image Generator #4o #MMM #AR Transformer","article_res/cover/2faffc97fcecf3151552cb0fd3206d89.jpeg","article_res/cover/1133cb4948af44cee2e7fbe79efb69e5.jpeg","The native image function of GPT-4o is officially launched","Introducing 4o Image Generation",{"id":192,"publish_date":193,"is_original":4,"collection":194,"cover_url":195,"cover_url_1_1":196,"title":197,"summary":198,"author":28},434,"2023-07-15","#Anthropic #OpenAI #Google #AI Code Generator #Claude","article_res/cover/e1b6f600a2b9f262a4392684e5f2ce25.jpeg","article_res/cover/6e1772e83f78f9a351ab23d3e414adee.jpeg","Latest Updates on Google Bard /Anthropic Claude2 / ChatGPT Code Interpreter","We want our models to use their programming skills to provide more natural interfaces to the basic functions of our computers.  \n - OpenAI",{"id":200,"publish_date":201,"is_original":4,"collection":146,"cover_url":202,"cover_url_1_1":203,"title":204,"summary":205,"author":28},417,"2023-08-24","article_res/cover/bccf897d50a88b18364e35f7466387e0.jpeg","article_res/cover/2f871085c1073717c1703ae86e18056f.jpeg","The GPT-3.5 Turbo fine-tuning (fine-tuning function) has been released～","Developers can now bring their own data to customize GPT-3.5 Turbo for their use cases.",{"id":207,"publish_date":208,"is_original":4,"collection":209,"cover_url":210,"cover_url_1_1":211,"title":212,"summary":213,"author":28},407,"2023-09-22","#OpenAI #AI Image Generator","article_res/cover/c59005e903d35cfc32346e2756e2728a.jpeg","article_res/cover/ba011d265e6d84b5c8cb6fd6b757b6cc.jpeg","Dall-E 3","DALL·E 3 understands significantly more nuance and detail, allowing you to easily translate your ideas into images.",[215,221,241],{"title":10,"list":216},[217,218,219,220],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":223},"GOOGLE",[224,225,226,234],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},615,"2025-03-30","#AI Researcher #AI Science #HKU #Google #AI Agent","article_res/cover/21fadf906067714bb0db31ae13a77c15.jpeg","article_res/cover/2697999a72bd26b22e85f0e92936d3ed.jpeg","AI-Researcher: LLM-driven全自动 scientific research assistant","AI-Researcher: Fully-Automated Scientific Discovery with LLM Agents  \nOpen-Sourced Alternative to Google AI Co-Scientist",{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},463,"2023-05-09","article_res/cover/89800f207723acdb55fc53bf999ebdc9.jpeg","article_res/cover/5764f369b4accd8f83e94aa4c077a175.jpeg","The Smallville sandbox world - A town with 25 virtual residents","Believable proxies of human behavior can empower interactive apps: Immersive environment, Rehearsal space, Prototyping tool",{"title":242,"list":243},"NVIDIA",[],true,{"code":4,"msg":5,"data":246},{"id":247,"publish_date":248,"is_original":23,"collection":249,"articles_id":250,"cover_url":251,"cover_url_1_1":252,"title":253,"summary":254,"author":28,"content":255,"popular":256,"list":317,"category":371,"tag":372},320,"2024-02-29","#Google #AI Game","cg-dUUpsvSUbb5d2mg1u3Q","article_res/cover/19ab8d13e88c221abdb72ad8f88eda90.jpeg","article_res/cover/0c002aab590ab91f19503d06f320afe4.jpeg","Google's newly launched Genie model — AI-generated games","We introduce Genie, a foundation world model trained exclusively from Internet videos that can generate 2D worlds.","\u003Cdiv class=\"rich_media_content js_underline_content\n                       autoTypeSetting24psection\n            \" id=\"js_content\">\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>Last week, Google published a paper titled \"Genie: Generative Interactive Environments,\" which represents the first generative interactive environment trained in an unsupervised manner from unlabelled internet videos. The model can generate various controllable virtual worlds based on descriptions such as text, synthetic images, photographs, or even sketches.\u003C/p>\u003Csection>\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"./assets/17423812878610.5065559129985544.mp4\" poster=\"./assets/17423812878460.36212023004427296.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>With 11 billion parameters, Genie can be considered a foundational world model. It consists of a spatiotemporal video tokenizer, an autoregressive dynamics model, and a simple yet scalable latent action model. Despite no real action labels or other domain-specific requirements commonly found in world model literature being used during training, Genie allows users to manipulate the generated environments frame by frame. Moreover, the learned latent action space enables agents to imitate behaviors from videos they have never seen before, paving the way for future training of general-purpose agents.\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>\u003Cbr>\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100003756\" data-ratio=\"0.4221105527638191\" data-s=\"300,640\" data-type=\"png\" data-w=\"995\" style=\"\" src=\"./assets/17423812891520.304275372010393.png\">\u003C/p>\u003Ch3 data-tool=\"mdnice编辑器\" style='margin-top: 30px;margin-bottom: 15px;font-weight: bold;font-size: 20px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;letter-spacing: normal;text-align: left;text-wrap: wrap;'>Introduction\u003C/h3>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>Genie utilized a dataset containing over 200,000 hours of two-dimensional platform game videos and trained an 11-billion-parameter world model. Through unsupervised learning, Genie mastered a variety of latent actions that can consistently control characters.\u003C/p>\u003Csection>\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"./assets/17423812878500.7959438766894857.mp4\" poster=\"./assets/17423812878500.6937436007254605.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>This model can transform any image into a playable 2D world. For example, Genie can bring human-designed creations to life, such as the beautiful artwork from Seneca and Caspian, two of the youngest world creators in history.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100003757\" data-ratio=\"0.8985294117647059\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"680\" style=\"\" src=\"./assets/17423812891950.3091790334136866.jpeg\">\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>The latent action space learned by Genie is not only diverse and consistent, but also interpretable. Usually, after a few attempts, humans can understand its mapping to semantically meaningful actions (such as walking left, walking right, jumping, etc.).\u003C/p>\u003Csection>\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"./assets/17423812878480.3913255616127338.mp4\" poster=\"./assets/17423812878470.515716196176857.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003Cspan style='text-align: center;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;'>\u003C/span>\u003C/section>\u003Ch3 data-tool=\"mdnice编辑器\" style='margin-top: 30px;margin-bottom: 15px;font-weight: bold;font-size: 20px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;letter-spacing: normal;text-align: left;text-wrap: wrap;'>Technology\u003C/h3>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>Genie uses a combination of spatiotemporal video tokenizers, autoregressive dynamics models, and latent action models to generate controllable video environments. It is trained using only video data without requiring action labels, inferring latent actions between frames through unsupervised learning, thereby enabling frame-by-frame control of the generated video sequences. To alleviate the quadratic memory cost issue brought by Vision Transformers for video processing, Genie employs a memory-efficient ST-transformer in all components. The model consists of three parts: the video tokenizer, the latent action model, and the dynamics model, as shown below:\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100003761\" data-ratio=\"0.2529411764705882\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"680\" style=\"\" src=\"./assets/17423812892000.12808729802955532.jpeg\">\u003C/p>\u003Col data-tool=\"mdnice编辑器\" class=\"list-paddingleft-1\" style='margin-top: 8px;margin-bottom: 8px;padding-left: 25px;width: 557.438px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;'>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">: Analyzes video frames and converts them into a series of representative tokens, capturing spatiotemporal information in the video.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100003763\" data-ratio=\"0.3719626168224299\" data-s=\"300,640\" data-type=\"png\" data-w=\"535\" style=\"\" src=\"./assets/17423812890280.7043099563461512.png\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">: Learning and inferring actions and changes between different frames, which are not explicitly labeled in the training data.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100003762\" data-ratio=\"0.4307116104868914\" data-s=\"300,640\" data-type=\"png\" data-w=\"534\" style=\"\" src=\"./assets/17423812890260.15735995798611846.png\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">: Predicting the next video frame based on the current frame and inferred latent actions, generating the next frame in the video sequence.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100003764\" data-ratio=\"0.6472727272727272\" data-s=\"300,640\" data-type=\"png\" data-w=\"550\" style=\"\" src=\"./assets/17423812904560.008153079266303598.png\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003C/section>\u003C/li>\u003C/ol>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>This structural design enables Genie to learn how to control and generate dynamic environments through videos themselves without relying on external annotations, providing a strong foundation for creating complex video simulations and interactive experiences.\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>The key to training the model lies in data and computing power. The team trained a classifier specifically for screening high-quality parts of the video dataset and conducted experiments on an extended scale. The experimental results showed that as the number of model parameters and batch size increased, the performance of the model gradually improved. The final developed model contained 11 billion parameters.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100003768\" data-ratio=\"0.3352941176470588\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"680\" style=\"\" src=\"./assets/17423812900550.5024465167587719.jpeg\">\u003C/p>\u003Ch3 data-tool=\"mdnice编辑器\" style='margin-top: 30px;margin-bottom: 15px;font-weight: bold;font-size: 20px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;letter-spacing: normal;text-align: left;text-wrap: wrap;'>Results\u003C/h3>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>Playback from image prompts: Images generated by text-to-image models, hand-drawn sketches, or real-world photos can be used to prompt Genie. In each case, the prompt frame and the second frame after taking four consecutive latent actions are shown here. In each case, significant character movement is visible, even though some images are visually distinct from the dataset.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100003765\" data-ratio=\"0.6212962962962963\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423812913460.042821709183215484.png\">\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>Genie's model has broad applicability and is not limited to two dimensions. The team also trained Genie on robotic data (RT-1). Although these data do not contain action labels, it demonstrates its ability to learn a controllable action simulation environment based on this data. This represents a promising step towards developing a general world model for artificial general intelligence (AGI).\u003C/p>\u003Csection>\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"./assets/17423812886750.5880964578717476.mp4\" poster=\"./assets/17423812888600.7437418295395328.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>The Genie model currently runs at 1 FPS (frames per second), which indeed means it is still far from real-time playability. However, although 1 FPS is far from sufficient for real-time interaction or gaming experience, considering the complexity and depth of the model's processing, this is already a fairly impressive achievement.\u003C/p>\u003Cp style=\"display: none;\">\u003Cmp-style-type data-value=\"3\">\u003C/mp-style-type>\u003C/p>\u003C/div>",[257,261,269,277,285,293,301,309],{"id":88,"title_md5":258,"publish_date":89,"author_md5":259,"is_original":23,"collection":90,"summary_md5":260,"cover_url":91,"cover_url_1_1":92},"9ce7808099172341f21d7ef1d1c29bdf","bc27fa490c4d0d525bac812fc0793534","0ae085102037106ef8f0e349904cfb4f",{"id":262,"title_md5":263,"publish_date":264,"author_md5":259,"is_original":23,"collection":265,"summary_md5":266,"cover_url":267,"cover_url_1_1":268},94,"ffa2e96c575160abcd0981661eecf829","2024-12-21","#Google #o1 #LLM #Gemini","04264bc4564cbf0b145278708eb687e6","article_res/cover/0978cc05f28f10330ab5289902cf6003.jpeg","article_res/cover/e13e8d924455d80dda7fa1120a61b2d8.jpeg",{"id":270,"title_md5":271,"publish_date":272,"author_md5":273,"is_original":4,"collection":5,"summary_md5":274,"cover_url":275,"cover_url_1_1":276},567,"2c4ece4a9a7dcc20b75e086a52cd3ed1","2022-04-19","8b3607d0f4181a3cb6ffdccf7185f09b","91c2b7aa99eccbd47dbae562097bf515","article_res/cover/8c0e9a3a98bbcb1e7dc7bd1d82581ad2.jpeg","article_res/cover/f30fe693548f114d1b73677f1511e5e8.jpeg",{"id":278,"title_md5":279,"publish_date":280,"author_md5":281,"is_original":4,"collection":5,"summary_md5":282,"cover_url":283,"cover_url_1_1":284},511,"dbf613a01d27e44104c9678e8b651264","2022-06-27","455a80659fd561aa8036c9b004c01ce0","3185b89aae0a8a83b41fd9d5db524652","article_res/cover/537c2ae9509ef87248181438153ccb20.jpeg","article_res/cover/79bf69575fbef66659d96e24ac8ac60f.jpeg",{"id":286,"title_md5":287,"publish_date":288,"author_md5":259,"is_original":23,"collection":289,"summary_md5":290,"cover_url":291,"cover_url_1_1":292},69,"390d7a3f922edf7263ec7ab070f4558a","2025-01-16","#OpenAI #LLM","653e13f68b38b0c66c0a5b3dbe4e59ef","article_res/cover/0ca2ea75ea4712a795992dc4232a9b0d.jpeg","article_res/cover/1c3c4a37beeeb80a3c288c1a168ad2e7.jpeg",{"id":294,"title_md5":295,"publish_date":296,"author_md5":259,"is_original":4,"collection":297,"summary_md5":298,"cover_url":299,"cover_url_1_1":300},336,"563987e7e51bcddde6bfe9979431090e","2024-01-25","#AI Video Generator #Google","ac39026990ffbe392ae4743e917d1da3","article_res/cover/736f3c32d86149246a8e221c6dd556fa.jpeg","article_res/cover/2d8d4843a666a921ac843a1fef0f5ec6.jpeg",{"id":302,"title_md5":303,"publish_date":304,"author_md5":259,"is_original":23,"collection":305,"summary_md5":306,"cover_url":307,"cover_url_1_1":308},148,"655c7956d710861fe36cb46bbb57b80b","2024-10-18","#State of AI Report 2024 #Neuroscience","da8c85b3528b4639b4668bfa614675af","article_res/cover/84839886d5ec15aa60dc248e4b93e8da.jpeg","article_res/cover/0d2e7d3ac32fd802ee41dbb5d4815703.jpeg",{"id":310,"title_md5":311,"publish_date":312,"author_md5":259,"is_original":23,"collection":313,"summary_md5":314,"cover_url":315,"cover_url_1_1":316},204,"ba7b361271f1a69900271aab0a596924","2024-08-05","#Flux","aa5b84048379285f9fe371226506717c","article_res/cover/35c36e3816ea77c05920c42e4c5134f0.jpeg","article_res/cover/c0a7db5e56efe88b6f86914ed3f2b679.jpeg",{"related":318,"small":356},[319,327,334,341,348],{"id":320,"publish_date":321,"is_original":23,"collection":322,"cover_url":323,"cover_url_1_1":324,"title":325,"summary":326,"author":28},150,"2024-10-16","#State of AI Report 2024","article_res/cover/86997d1ea44330bdef40d7bc8ca7969e.jpeg","article_res/cover/26dfcdc6dd810972072ff70f60e37d70.jpeg","\"State of AI Report 2024\" - The current state of AI and its future impact","The State of AI Report analyzes the most interesting developments in AI.",{"id":328,"publish_date":329,"is_original":4,"collection":5,"cover_url":330,"cover_url_1_1":331,"title":332,"summary":333,"author":28},355,"2024-01-01","article_res/cover/ea35483b21a73e6ac9a6645f7c948496.jpeg","article_res/cover/5282d6ef1a096870b451aa4443c0fffb.jpeg","\"CRYPTO THESES 2024\" on Crypto Monies","Bitcoin truly is digital gold, transmuted from physical energy sources and a p2p global competition for computing power.",{"id":335,"publish_date":336,"is_original":23,"collection":73,"cover_url":337,"cover_url_1_1":338,"title":339,"summary":340,"author":28},198,"2024-08-13","article_res/cover/873ffb7461c10e9f4d5e76ea3e1a8671.jpeg","article_res/cover/88e34eef77d060df18230b3307b1183d.jpeg","How Google DeepMind scientist Nicholas Carlini uses AI (Part 2) - Specific Examples","Evaluate what LLMs can do, not what they can't",{"id":342,"publish_date":343,"is_original":23,"collection":322,"cover_url":344,"cover_url_1_1":345,"title":346,"summary":347,"author":28},149,"2024-10-17","article_res/cover/3b8743ca9c648e09b59e160b16c4b196.jpeg","article_res/cover/4773993643e3fc5a8b782c4d0761636d.jpeg","\"State of AI Report 2024\" (1) - AlphaGeometry, Synthetic Data, RAG","Foundation models demonstrate their ability to break out of language as multimodal research.",{"id":349,"publish_date":350,"is_original":23,"collection":351,"cover_url":352,"cover_url_1_1":353,"title":354,"summary":355,"author":28},75,"2025-01-08","#Lip Sync #AI Avatar #Bytedance","article_res/cover/1120d78d62c302df54f1609a6e7b2f04.jpeg","article_res/cover/6e8cd9f20df121b0ae51852ee41ab347.jpeg","Byte's latest open-source lip-sync video generation: LatentSync","Taming Stable Diffusion for Lip Sync!",[357,363,369],{"title":10,"list":358},[359,360,361,362],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":364},[365,366,367,368],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},{"title":242,"list":370},[],[8,9,10],[8,12,13,14,9,10,15,16,17,18],["Reactive",245],1754646416160]