[{"data":1,"prerenderedAt":380},["ShallowReactive",2],{"$fgukOamtKU1RtUiMFsqdObttmqPPQz0uc7bl_gj_LyX0":3,"$faVifjvf513dh2aSg8Q4FS3gowyxu2wu9HRzTmdg3a2Y":245,"article-208":379},{"code":4,"msg":5,"data":6},0,"",{"category":7,"tag":11,"popular":19,"latest":86,"banner":126,"list":151,"cache":244},[8,9,10],"Agent","OpenAI","LLM",[8,12,13,14,9,10,15,16,17,18],"Google","Nvidia","Claude","DeepSeek","OCR","Chat","Generator",[20,29,37,45,54,62,70,79],{"id":21,"publish_date":22,"is_original":23,"collection":5,"cover_url":24,"cover_url_1_1":25,"title":26,"summary":27,"author":28},411,"2023-09-10",1,"article_res/cover/451ef50c225a8dc61c4336506794d13b.jpeg","article_res/cover/3ba9dc7a72f87d40b20fc2d225289ee3.jpeg","Idealism","Reality is created by the mind, we can change our reality by changing our mind. - Plato","Renee's Entrepreneurial Journey",{"id":30,"publish_date":31,"is_original":23,"collection":32,"cover_url":33,"cover_url_1_1":34,"title":35,"summary":36,"author":28},108,"2024-12-07","#LLM #AGI #AI Agent","article_res/cover/0039044422e4ec9f61c18e8ee1693bb0.jpeg","article_res/cover/4220971b108a91d21407d87bb02fbaa6.jpeg","Freysa.ai: The World's First Adversarial AI Agent Game","说服 Freysa 把钱包里的钱都拿出来",{"id":38,"publish_date":39,"is_original":23,"collection":40,"cover_url":41,"cover_url_1_1":42,"title":43,"summary":44,"author":28},12,"2025-03-09","#Oxford #Reasoning #LLM #Tool Use","article_res/cover/d448e9b3617a0b5302e1bd10c438bca9.jpeg","article_res/cover/864a468f9cc4c9317efadb3811909888.jpeg","Agentic Reasoning Framework - Significantly enhance the reasoning ability of LLMs through the integration of external tools using agents","Agentic Reasoning: Reasoning LLMs with Tools for Deep Research",{"id":46,"publish_date":47,"is_original":4,"collection":48,"cover_url":49,"cover_url_1_1":50,"title":51,"summary":52,"author":53},480,"2023-04-14","#Stable Diffusion","article_res/cover/0bdbe7cb1de4a78e54536e5d9afa7ec9.jpeg","article_res/cover/b3d6ffec0608dcfaf18c5a69906d1490.jpeg","【AIGC Learning】Generate Prompts Using Word Graphs - Stable Diffusion Web UI Series 13","AI will become a powerful tool in education, transforming the way we learn and deliver instruction.  \n- Reid Hoffman","--",{"id":55,"publish_date":56,"is_original":4,"collection":57,"cover_url":58,"cover_url_1_1":59,"title":60,"summary":61,"author":28},413,"2023-09-08","#Neuroscience","article_res/cover/74f8302d78a23d9430f22171eae136b6.jpeg","article_res/cover/87ca08af81bb304746be5261160964c0.jpeg","Can machines be conscious?","Do we have an ethical obligation to not turn off conscious machines? Would turning them off be murder? No. I don't lose any sleep over unplugging a conscious machine.\n- Jeff Hawkins, \"A Thousand Brains\"",{"id":63,"publish_date":64,"is_original":23,"collection":65,"cover_url":66,"cover_url_1_1":67,"title":68,"summary":69,"author":28},178,"2024-09-09","#Entrepreneurship","article_res/cover/a7224f025b55d1820408085faef63079.jpeg","article_res/cover/11a9995b096cbf64465ef01b8673b154.jpeg","37signals company","This damn sense of relaxation",{"id":71,"publish_date":72,"is_original":4,"collection":73,"cover_url":74,"cover_url_1_1":75,"title":76,"summary":77,"author":78},460,"2023-05-12","#Google","article_res/cover/b970687b12faa52da976f91248c2aa7b.jpeg","article_res/cover/d1e71b52cfd2c63bc6e71f3e85ff135c.jpeg","Learn what BRC-20 and Ordinals are using Google Bard","Ordinals - a new protocol that allows users to store arbitrary data on the Bitcoin blockchain","Google Bard mainly writes",{"id":80,"publish_date":81,"is_original":23,"collection":5,"cover_url":82,"cover_url_1_1":83,"title":84,"summary":85,"author":28},309,"2024-03-26","article_res/cover/9877f95894ee88532d0e6012c23a2df3.jpeg","article_res/cover/20092164ddc109ce6ae56b1984246751.jpeg","Learning the Cancun Upgrade with lepton and perplexity","Building a quick conversation-based search demo with Lepton AI.",[87,95,103,111,119],{"id":88,"publish_date":89,"is_original":23,"collection":90,"cover_url":91,"cover_url_1_1":92,"title":93,"summary":94,"author":28},627,"2025-03-20","#AI Avatar #AI Video Generation","article_res/cover/d95481358f73924989f8c4ee9c75d1c8.jpeg","article_res/cover/b74bc0fab01f8b6a6aa87696c0c3ed8b.jpeg","DisPose: Generating Animated Videos by Driving Video with Reference Images","DisPose is a controllable human image animation method that enhances video generation.",{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},626,"2025-03-21","#Deep Dive into LLMs #LLM #RL #Andrej Karpathy #AlphaGo","article_res/cover/446553a5c8f8f2f07d97b20eaee84e56.jpeg","article_res/cover/e6c2823409c9b34624064b9acbaca6f1.jpeg","AlphaGo and the Power of Reinforcement Learning - Andrej Karpathy's Deep Dive on LLMs (Part 9)","Simply learning from humans will never surpass human capabilities.",{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},625,"2025-03-22","#Deep Dive into LLMs #LLM #RL #RLHF #Andrej Karpathy","article_res/cover/8da81d38b1e5cf558a164710fd8a5389.jpeg","article_res/cover/96f028d76c362a99a0dd56389e8f7a9b.jpeg","Reinforcement Learning from Human Feedback (RLHF) - Andrej Karpathy's Deep Dive on LLMs (Part 10)","Fine-Tuning Language Models from Human Preferences",{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},624,"2025-03-23","#Deep Dive into LLMs #LLM #Andrej Karpathy #AI Agent #MMM","article_res/cover/a5e7c3d48bb09109684d6513287c661d.jpeg","article_res/cover/d3f22b7c0ab8d82fd2da457a299e0773.jpeg","The Future of Large Language Models - Andrej Karpathy's In-Depth Explanation of LLM (Part 11)","preview of things to come",{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},623,"#Google #Voe #AI Video Generation","article_res/cover/c44062fea0f336c2b96b3928292392c2.jpeg","article_res/cover/a041041c69092ad3db191c5bf3ff981b.jpeg","Trial of Google's video generation model VOE2","Our state-of-the-art video generation model",[127,135,143],{"id":128,"publish_date":129,"is_original":23,"collection":130,"cover_url":131,"cover_url_1_1":132,"title":133,"summary":134,"author":28},300,"2024-04-16","#AI in Science #AGI","article_res/cover/6bf01e793e0f33e848572412eebdf9b0.jpeg","article_res/cover/91a5ee21dafecb914fabeb9430d46ec1.jpeg","Would Einstein lose his job - AI and Quantum Computing: A Glimpse into the Near Future","So Einstein's job is still safe.",{"id":136,"publish_date":137,"is_original":23,"collection":138,"cover_url":139,"cover_url_1_1":140,"title":141,"summary":142,"author":28},101,"2024-12-14","#Nvidia #AI 3D Generator","article_res/cover/693e07c85980c5c0c8fde3f037733f23.jpeg","article_res/cover/9ea8edff2d5d303ff3fffff3f6f9c3d9.jpeg","NVIDIA's open-source 3D project LLaMA-Mesh","LLaMA-Mesh: Unifying 3D Mesh Generation with Language Models",{"id":144,"publish_date":145,"is_original":23,"collection":146,"cover_url":147,"cover_url_1_1":148,"title":149,"summary":150,"author":28},131,"2024-11-10","#OpenAI","article_res/cover/87f8ed353ce39f31960e7cdfaf075a35.jpeg","article_res/cover/f597a63935f5cd32e484b4aadd6019e8.jpeg","ChatGPT has launched the Search function","Get fast, timely answers with links to relevant web sources.",{"big":152,"small":214},[153,181],{"title":154,"list":155},"AGENT",[156,157,165,173],{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":158,"publish_date":159,"is_original":23,"collection":160,"cover_url":161,"cover_url_1_1":162,"title":163,"summary":164,"author":28},622,"2025-03-24","#OWL #AI Agent #MAS #MCP #CUA","article_res/cover/cb50ca7f2bf4d1ed50202d7406e1c19a.jpeg","article_res/cover/4aa7aa3badfacf3cc84121334f1050dd.jpeg","OWL: Multi-agent collaboration","OWL: Optimized Workforce Learning for General Multi-Agent Assistance in Real-World Task Automation",{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},620,"2025-03-26","#LLM #Google #Gemini #AI Agent","article_res/cover/53751a6dbbe990b1eb0b63f3b062aed4.jpeg","article_res/cover/031344981f0a212ff82d1f3a64aa5756.jpeg","Gemini 2.5 Pro, claimed to be far ahead of the competition, has been released with great fanfare: comprehensively surpassing other LLMs and topping the global rankings","Gemini 2.5: Our most intelligent AI model",{"id":174,"publish_date":175,"is_original":23,"collection":176,"cover_url":177,"cover_url_1_1":178,"title":179,"summary":180,"author":28},616,"2025-03-29","#MAS #AI Agent #AI Coder #MetaGPT #MGX","article_res/cover/9dcd702ad2035902e5e77967c34a1f1e.jpeg","article_res/cover/0a97fc4a922753c8f46ff38792020df8.jpeg","MGX - An automated website-building platform composed of multiple AI Agents","Your 24/7 AI Team | Dream, Chat, Create.",{"title":182,"list":183},"OPENAI",[184,191,199,206],{"id":185,"publish_date":167,"is_original":23,"collection":186,"cover_url":187,"cover_url_1_1":188,"title":189,"summary":190,"author":28},619,"#OpenAI #AI Image Generator #4o #MMM #AR Transformer","article_res/cover/2faffc97fcecf3151552cb0fd3206d89.jpeg","article_res/cover/1133cb4948af44cee2e7fbe79efb69e5.jpeg","The native image function of GPT-4o is officially launched","Introducing 4o Image Generation",{"id":192,"publish_date":193,"is_original":4,"collection":194,"cover_url":195,"cover_url_1_1":196,"title":197,"summary":198,"author":28},434,"2023-07-15","#Anthropic #OpenAI #Google #AI Code Generator #Claude","article_res/cover/e1b6f600a2b9f262a4392684e5f2ce25.jpeg","article_res/cover/6e1772e83f78f9a351ab23d3e414adee.jpeg","Latest Updates on Google Bard /Anthropic Claude2 / ChatGPT Code Interpreter","We want our models to use their programming skills to provide more natural interfaces to the basic functions of our computers.  \n - OpenAI",{"id":200,"publish_date":201,"is_original":4,"collection":146,"cover_url":202,"cover_url_1_1":203,"title":204,"summary":205,"author":28},417,"2023-08-24","article_res/cover/bccf897d50a88b18364e35f7466387e0.jpeg","article_res/cover/2f871085c1073717c1703ae86e18056f.jpeg","The GPT-3.5 Turbo fine-tuning (fine-tuning function) has been released～","Developers can now bring their own data to customize GPT-3.5 Turbo for their use cases.",{"id":207,"publish_date":208,"is_original":4,"collection":209,"cover_url":210,"cover_url_1_1":211,"title":212,"summary":213,"author":28},407,"2023-09-22","#OpenAI #AI Image Generator","article_res/cover/c59005e903d35cfc32346e2756e2728a.jpeg","article_res/cover/ba011d265e6d84b5c8cb6fd6b757b6cc.jpeg","Dall-E 3","DALL·E 3 understands significantly more nuance and detail, allowing you to easily translate your ideas into images.",[215,221,241],{"title":10,"list":216},[217,218,219,220],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":223},"GOOGLE",[224,225,226,234],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},615,"2025-03-30","#AI Researcher #AI Science #HKU #Google #AI Agent","article_res/cover/21fadf906067714bb0db31ae13a77c15.jpeg","article_res/cover/2697999a72bd26b22e85f0e92936d3ed.jpeg","AI-Researcher: LLM-driven全自动 scientific research assistant","AI-Researcher: Fully-Automated Scientific Discovery with LLM Agents  \nOpen-Sourced Alternative to Google AI Co-Scientist",{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},463,"2023-05-09","article_res/cover/89800f207723acdb55fc53bf999ebdc9.jpeg","article_res/cover/5764f369b4accd8f83e94aa4c077a175.jpeg","The Smallville sandbox world - A town with 25 virtual residents","Believable proxies of human behavior can empower interactive apps: Immersive environment, Rehearsal space, Prototyping tool",{"title":242,"list":243},"NVIDIA",[],true,{"code":4,"msg":5,"data":246},{"id":247,"publish_date":248,"is_original":23,"collection":249,"articles_id":250,"cover_url":251,"cover_url_1_1":252,"title":253,"summary":254,"author":28,"content":255,"popular":256,"list":320,"category":377,"tag":378},208,"2024-07-31","#Meta #Object Detection","H2l60KQEbcCUkkoWzfAPlA","article_res/cover/fd1b90586fee909b10777e47d561f409.jpeg","article_res/cover/d30b0789f086ae58ad87acd922eb7a19.jpeg","Meta's SAM 2 - A unified model for real-time object segmentation","Introducing SAM 2: The next generation of Meta Segment Anything Model for videos and images","\u003Cdiv class=\"rich_media_content js_underline_content\n                       autoTypeSetting24psection\n            \" id=\"js_content\">\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>, which received great feedback from the community. Yesterday, Meta released SAM 2, a unified model for real-time object segmentation that applies to both images and videos, achieving industry-leading performance.\u003C/p>\u003Csection>\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"./assets/17423803140060.46647975778987494.mp4\" poster=\"./assets/17423803140080.5701575625223112.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>Meta has shared the code and model weights under a permissive Apache 2.0 license. Additionally, Meta also released the SA-V dataset, which includes approximately 51,000 real-world videos and over 600,000 spatial-temporal masks (masklets). (Real open-source!!)\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>SAM 2 can segment any object in any video or image, even objects and visual domains it has never seen before, allowing it to be applied across various scenarios without customization. SAM 2 has many potential practical applications; for example, its output can be combined with generative video models to create new video effects and unlock new creative applications. SAM 2 can also help accelerate annotation tools for visual data, building better computer vision systems.\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>\u003Cspan style='color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 20px;font-weight: 700;letter-spacing: normal;text-align: left;text-wrap: wrap;background-color: rgb(255, 255, 255);'>Web preview\u003C/span>\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>The web-based demo of SAM 2 provides a preview function that allows users to segment and track objects in videos and apply effects. https://sam2.metademolab.com/. I tracked a football⚽️ and a watch⌚️, and as the video frames passed, the results were indeed quite good. There is also an open-source model available on Github for setting up your own environment: https://github.com/facebookresearch/segment-anything-2.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100006143\" data-ratio=\"0.5453703703703704\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423803163460.2855824203408208.png\">\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>\u003Cspan style=\"font-size: 20px;font-weight: bold;letter-spacing: 0em;\">Technical framework\u003C/span>\u003Cbr>\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>Segment Anything Model 2 (SAM 2) is a foundational model dedicated to solving prompt-based visual segmentation in images and videos. We extended SAM to video processing by treating images as single-frame videos. The model is designed as a simple transformer architecture with streaming memory to enable real-time video processing.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100006119\" data-ratio=\"0.2518518518518518\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423803163480.6507125096647188.png\">\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>We built a model-loop data engine that improves models and data through user interaction, collecting our SA-V dataset (as shown in the figure below), which is the largest online video segmentation dataset to date. After training on our data, SAM 2 performs well across a wide range of tasks and visual domains.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100006118\" data-ratio=\"0.5407407407407407\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"\" src=\"./assets/17423803163490.3890488800065506.jpeg\">\u003C/p>\u003Ch4 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);cursor: pointer;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;text-wrap: wrap;background-color: rgb(255, 255, 255);'>\u003Cspan style=\"cursor: pointer;font-size: 18px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">Architecture evolution\u003C/span>\u003Csection>\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"./assets/17423803140350.8349364041950496.mp4\" poster=\"./assets/17423803139820.03227532220892093.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003C/h4>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>In the architectural evolution from SAM to SAM 2, SAM 2 adds an occlusion head to predict whether an object is visible, enabling segmentation even when the object is temporarily occluded.\u003C/p>\u003Csection>\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"./assets/17423803142410.2534306400911974.mp4\" poster=\"./assets/17423803140030.6750847716815007.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);cursor: pointer;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;text-wrap: wrap;background-color: rgb(255, 255, 255);'>\u003Cspan style=\"cursor: pointer;font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">Use cases\u003C/span>\u003C/h3>\u003Cp style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;text-wrap: wrap;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>SAM 2 can be directly applied to various practical use cases, such as tracking objects to create video effects (as shown in the left image), or segmenting moving cells in videos captured by microscopes to assist scientific research (as shown in the right image).\u003C/p>\u003Csection>\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"./assets/17423803161570.7715363112266032.mp4\" poster=\"./assets/17423803140660.867703152082568.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003Cp style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;text-wrap: wrap;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>In the future, SAM 2 could be part of larger AI systems, recognizing everyday items through AR glasses and providing reminders and instructions to users.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100006140\" data-ratio=\"0.25510204081632654\" data-s=\"300,640\" data-type=\"gif\" data-w=\"1078\" style=\"\" src=\"./assets/17423803166450.6111718211832331.gif\">\u003C/p>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);cursor: pointer;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;text-wrap: wrap;background-color: rgb(255, 255, 255);'>\u003Cspan style=\"cursor: pointer;font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">Comparison\u003C/span>\u003C/h3>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>In the comparison, both models initialized the mask of the T-shirt in the first frame. The baseline model used SAM's mask. In contrast, SAM 2 can accurately track object parts throughout the entire video, while the baseline model over-segments, including the person's head instead of just tracking the T-shirt.\u003C/p>\u003Csection>\u003Cdiv style=\"height: 508px; background: rgb(0, 0, 0); border-radius: 4px; overflow: hidden; margin-bottom: 12px;\">\u003Cvideo src=\"./assets/17423803142540.1132748733485145.mp4\" poster=\"./assets/17423803146540.45822526948393105.jpeg\" controls=\"\" style=\"width: 100%;height: 100%;\">\u003C/video>\u003C/div>\u003C/section>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>SAM 2 (right image) outperforms SAM (left image) in terms of object segmentation accuracy in images.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100006117\" data-ratio=\"0.5175925925925926\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423803164580.5448696818038967.png\">\u003C/p>\u003Cp style=\"display: none;\">\u003Cmp-style-type data-value=\"3\">\u003C/mp-style-type>\u003C/p>\u003C/div>",[257,265,274,281,289,296,304,312],{"id":258,"title_md5":259,"publish_date":260,"author_md5":261,"is_original":4,"collection":5,"summary_md5":262,"cover_url":263,"cover_url_1_1":264},503,"632aae465ed77dc7bec8d6c70a325b4d","2022-08-13","f44d4b523ff110f3126ff57530ea5253","9057bfb73b448afaa099d71b91081fbf","article_res/cover/f0be439bba0d7592f5e206a775cb6d1d.jpeg","article_res/cover/d1cef2d2daf78358925bd16dd02f9cd8.jpeg",{"id":266,"title_md5":267,"publish_date":268,"author_md5":269,"is_original":23,"collection":270,"summary_md5":271,"cover_url":272,"cover_url_1_1":273},399,"ea200fc67f4264d2920caf55136495c6","2023-10-08","bc27fa490c4d0d525bac812fc0793534","#AI Image Generator #Tencent","e66cb4e964b30d415141b35935cf81e7","article_res/cover/d8df91af5daff4596d41f539900b2f11.jpeg","article_res/cover/4e52581af1766f866fc106997dadc216.jpeg",{"id":275,"title_md5":276,"publish_date":277,"author_md5":269,"is_original":4,"collection":5,"summary_md5":278,"cover_url":279,"cover_url_1_1":280},376,"2e4701866f530a76c32f6ca05911fd0a","2023-12-03","5ab517556027349e57e9ca00a6bd92fe","article_res/cover/e66fc91c4f2e9aa3aa0348853d115b5c.jpeg","article_res/cover/b2ba3e99fa01a7d190ae01f3591ed88a.jpeg",{"id":282,"title_md5":283,"publish_date":284,"author_md5":269,"is_original":23,"collection":285,"summary_md5":286,"cover_url":287,"cover_url_1_1":288},49,"7bd14f65c5c6320135ab1d43f1a3bc31","2025-02-04","#Snap #World Model #AI 3D Generator","e50459e4697aca5fc7c4e86c534ed173","article_res/cover/1f094b1c4b84e1f350f93b0a928a5e09.jpeg","article_res/cover/11c6d8e68b338c34b8d480889296649f.jpeg",{"id":290,"title_md5":291,"publish_date":292,"author_md5":269,"is_original":23,"collection":146,"summary_md5":293,"cover_url":294,"cover_url_1_1":295},280,"455091bdbdbee638b3a321df9edce155","2024-05-09","a795ddf3415ce755e488296baf27c81d","article_res/cover/be0bb529e4f8f58b671ad675b89e31ec.jpeg","article_res/cover/d9f9a24f6bf2aa07830b5997f128de0c.jpeg",{"id":297,"title_md5":298,"publish_date":299,"author_md5":300,"is_original":4,"collection":5,"summary_md5":301,"cover_url":302,"cover_url_1_1":303},534,"80e31f27ce48e2a3940fc59930bb7eef","2022-05-22","8b3607d0f4181a3cb6ffdccf7185f09b","8e7d5bfc6c8db879b1ab5ace9ffdc46d","article_res/cover/0ebfe9c490974ead7e3dfd58e9c6bb07.jpeg","article_res/cover/53265d43b8157200837007872bfbef0b.jpeg",{"id":305,"title_md5":306,"publish_date":307,"author_md5":308,"is_original":4,"collection":5,"summary_md5":309,"cover_url":310,"cover_url_1_1":311},419,"adccaea51546b404eb20ff934aba1e3c","2023-08-22","cfab1ba8c67c7c838db98d666f02a132","4db22b1a4bf939dbef2957fc5232e0e9","article_res/cover/0418afcee8ef2929f76d1437f08b164f.jpeg","article_res/cover/40a5325e953d7ffcfe5f2bde881f04ee.jpeg",{"id":313,"title_md5":314,"publish_date":315,"author_md5":269,"is_original":23,"collection":316,"summary_md5":317,"cover_url":318,"cover_url_1_1":319},183,"3f741220babab7d253633d147110226c","2024-09-03","#Meta","02d3cd60c5f2788d07872bf9c1e2a639","article_res/cover/1430550bbf1444488b94d767f1dd97c3.jpeg","article_res/cover/e06f0dabb08939da492aaaf263e5f07f.jpeg",{"related":321,"small":362},[322,330,338,346,354],{"id":323,"publish_date":324,"is_original":4,"collection":325,"cover_url":326,"cover_url_1_1":327,"title":328,"summary":329,"author":28},363,"2023-12-22","#AI Video Generator #Pika","article_res/cover/1ce241a951660e5459ed62b7343717cb.jpeg","article_res/cover/0c23549090b06b680881d757102710fc.jpeg","Pika AI Video Generation Whitelist Experience","Pika.art: An idea-to-video platform that brings your creativity to motion.",{"id":331,"publish_date":332,"is_original":23,"collection":333,"cover_url":334,"cover_url_1_1":335,"title":336,"summary":337,"author":28},60,"2025-01-24","#OpenAI #Operator #AI Agent #LLM #CUA","article_res/cover/213af3f4a84ff7ab8a2e8993c59d6273.jpeg","article_res/cover/4bf22a42693d5957057fe156fc1cfdbf.jpeg","OpenAI Releases New Features: Operator and CUA, Allowing AI to Handle Your Web Tasks","A research preview of an agent that can use its own browser to perform tasks for you.",{"id":339,"publish_date":340,"is_original":4,"collection":5,"cover_url":341,"cover_url_1_1":342,"title":343,"summary":344,"author":345},554,"2022-05-02","article_res/cover/5e0e1b6c5a43cc8e35ce6e45e8250f50.jpeg","article_res/cover/fbbab82bf840d89770767f59c7dd43b7.jpeg","See oneself as matter","Our task must be to widen the circle of compassion to embrace all living creatures and the beauty of nature in its entirety, and to free ourselves from this prison.","Excerpt",{"id":347,"publish_date":348,"is_original":4,"collection":349,"cover_url":350,"cover_url_1_1":351,"title":352,"summary":353,"author":28},425,"2023-08-04","#AI Agents #AI Game #AI Code Generator #AI Agent","article_res/cover/e364b56ed639fae29cdde957447165e4.jpeg","article_res/cover/3441f67d984d02d4373fbb45770a22f1.jpeg","ChatDev - AI Agents for Game Development","Collaboration allows us to know more than we are capable of knowing by ourselves.  - Paul Solarz",{"id":355,"publish_date":356,"is_original":4,"collection":65,"cover_url":357,"cover_url_1_1":358,"title":359,"summary":360,"author":361},559,"2022-04-27","article_res/cover/838866e5860d413ba612c26d877f0997.jpeg","article_res/cover/e18ed6814c52132af4db6b42cd7d83b9.jpeg","Business breakthrough of startups","Jump into the middle of things, get your hands dirty, fall flat on your face, and then reach for the stars.","Chat learning",[363,369,375],{"title":10,"list":364},[365,366,367,368],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":370},[371,372,373,374],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},{"title":242,"list":376},[],[8,9,10],[8,12,13,14,9,10,15,16,17,18],["Reactive",245],1754646410709]