[{"data":1,"prerenderedAt":376},["ShallowReactive",2],{"$fgukOamtKU1RtUiMFsqdObttmqPPQz0uc7bl_gj_LyX0":3,"$fmIumGsXX2blDjQscYf0820ki7Ut4t4VG4_flhB1CSUU":245,"article-395":375},{"code":4,"msg":5,"data":6},0,"",{"category":7,"tag":11,"popular":19,"latest":86,"banner":126,"list":151,"cache":244},[8,9,10],"Agent","OpenAI","LLM",[8,12,13,14,9,10,15,16,17,18],"Google","Nvidia","Claude","DeepSeek","OCR","Chat","Generator",[20,29,37,45,54,62,70,79],{"id":21,"publish_date":22,"is_original":23,"collection":5,"cover_url":24,"cover_url_1_1":25,"title":26,"summary":27,"author":28},411,"2023-09-10",1,"article_res/cover/451ef50c225a8dc61c4336506794d13b.jpeg","article_res/cover/3ba9dc7a72f87d40b20fc2d225289ee3.jpeg","Idealism","Reality is created by the mind, we can change our reality by changing our mind. - Plato","Renee's Entrepreneurial Journey",{"id":30,"publish_date":31,"is_original":23,"collection":32,"cover_url":33,"cover_url_1_1":34,"title":35,"summary":36,"author":28},108,"2024-12-07","#LLM #AGI #AI Agent","article_res/cover/0039044422e4ec9f61c18e8ee1693bb0.jpeg","article_res/cover/4220971b108a91d21407d87bb02fbaa6.jpeg","Freysa.ai: The World's First Adversarial AI Agent Game","说服 Freysa 把钱包里的钱都拿出来",{"id":38,"publish_date":39,"is_original":23,"collection":40,"cover_url":41,"cover_url_1_1":42,"title":43,"summary":44,"author":28},12,"2025-03-09","#Oxford #Reasoning #LLM #Tool Use","article_res/cover/d448e9b3617a0b5302e1bd10c438bca9.jpeg","article_res/cover/864a468f9cc4c9317efadb3811909888.jpeg","Agentic Reasoning Framework - Significantly enhance the reasoning ability of LLMs through the integration of external tools using agents","Agentic Reasoning: Reasoning LLMs with Tools for Deep Research",{"id":46,"publish_date":47,"is_original":4,"collection":48,"cover_url":49,"cover_url_1_1":50,"title":51,"summary":52,"author":53},480,"2023-04-14","#Stable Diffusion","article_res/cover/0bdbe7cb1de4a78e54536e5d9afa7ec9.jpeg","article_res/cover/b3d6ffec0608dcfaf18c5a69906d1490.jpeg","【AIGC Learning】Generate Prompts Using Word Graphs - Stable Diffusion Web UI Series 13","AI will become a powerful tool in education, transforming the way we learn and deliver instruction.  \n- Reid Hoffman","--",{"id":55,"publish_date":56,"is_original":4,"collection":57,"cover_url":58,"cover_url_1_1":59,"title":60,"summary":61,"author":28},413,"2023-09-08","#Neuroscience","article_res/cover/74f8302d78a23d9430f22171eae136b6.jpeg","article_res/cover/87ca08af81bb304746be5261160964c0.jpeg","Can machines be conscious?","Do we have an ethical obligation to not turn off conscious machines? Would turning them off be murder? No. I don't lose any sleep over unplugging a conscious machine.\n- Jeff Hawkins, \"A Thousand Brains\"",{"id":63,"publish_date":64,"is_original":23,"collection":65,"cover_url":66,"cover_url_1_1":67,"title":68,"summary":69,"author":28},178,"2024-09-09","#Entrepreneurship","article_res/cover/a7224f025b55d1820408085faef63079.jpeg","article_res/cover/11a9995b096cbf64465ef01b8673b154.jpeg","37signals company","This damn sense of relaxation",{"id":71,"publish_date":72,"is_original":4,"collection":73,"cover_url":74,"cover_url_1_1":75,"title":76,"summary":77,"author":78},460,"2023-05-12","#Google","article_res/cover/b970687b12faa52da976f91248c2aa7b.jpeg","article_res/cover/d1e71b52cfd2c63bc6e71f3e85ff135c.jpeg","Learn what BRC-20 and Ordinals are using Google Bard","Ordinals - a new protocol that allows users to store arbitrary data on the Bitcoin blockchain","Google Bard mainly writes",{"id":80,"publish_date":81,"is_original":23,"collection":5,"cover_url":82,"cover_url_1_1":83,"title":84,"summary":85,"author":28},309,"2024-03-26","article_res/cover/9877f95894ee88532d0e6012c23a2df3.jpeg","article_res/cover/20092164ddc109ce6ae56b1984246751.jpeg","Learning the Cancun Upgrade with lepton and perplexity","Building a quick conversation-based search demo with Lepton AI.",[87,95,103,111,119],{"id":88,"publish_date":89,"is_original":23,"collection":90,"cover_url":91,"cover_url_1_1":92,"title":93,"summary":94,"author":28},627,"2025-03-20","#AI Avatar #AI Video Generation","article_res/cover/d95481358f73924989f8c4ee9c75d1c8.jpeg","article_res/cover/b74bc0fab01f8b6a6aa87696c0c3ed8b.jpeg","DisPose: Generating Animated Videos by Driving Video with Reference Images","DisPose is a controllable human image animation method that enhances video generation.",{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},626,"2025-03-21","#Deep Dive into LLMs #LLM #RL #Andrej Karpathy #AlphaGo","article_res/cover/446553a5c8f8f2f07d97b20eaee84e56.jpeg","article_res/cover/e6c2823409c9b34624064b9acbaca6f1.jpeg","AlphaGo and the Power of Reinforcement Learning - Andrej Karpathy's Deep Dive on LLMs (Part 9)","Simply learning from humans will never surpass human capabilities.",{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},625,"2025-03-22","#Deep Dive into LLMs #LLM #RL #RLHF #Andrej Karpathy","article_res/cover/8da81d38b1e5cf558a164710fd8a5389.jpeg","article_res/cover/96f028d76c362a99a0dd56389e8f7a9b.jpeg","Reinforcement Learning from Human Feedback (RLHF) - Andrej Karpathy's Deep Dive on LLMs (Part 10)","Fine-Tuning Language Models from Human Preferences",{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},624,"2025-03-23","#Deep Dive into LLMs #LLM #Andrej Karpathy #AI Agent #MMM","article_res/cover/a5e7c3d48bb09109684d6513287c661d.jpeg","article_res/cover/d3f22b7c0ab8d82fd2da457a299e0773.jpeg","The Future of Large Language Models - Andrej Karpathy's In-Depth Explanation of LLM (Part 11)","preview of things to come",{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},623,"#Google #Voe #AI Video Generation","article_res/cover/c44062fea0f336c2b96b3928292392c2.jpeg","article_res/cover/a041041c69092ad3db191c5bf3ff981b.jpeg","Trial of Google's video generation model VOE2","Our state-of-the-art video generation model",[127,135,143],{"id":128,"publish_date":129,"is_original":23,"collection":130,"cover_url":131,"cover_url_1_1":132,"title":133,"summary":134,"author":28},300,"2024-04-16","#AI in Science #AGI","article_res/cover/6bf01e793e0f33e848572412eebdf9b0.jpeg","article_res/cover/91a5ee21dafecb914fabeb9430d46ec1.jpeg","Would Einstein lose his job - AI and Quantum Computing: A Glimpse into the Near Future","So Einstein's job is still safe.",{"id":136,"publish_date":137,"is_original":23,"collection":138,"cover_url":139,"cover_url_1_1":140,"title":141,"summary":142,"author":28},101,"2024-12-14","#Nvidia #AI 3D Generator","article_res/cover/693e07c85980c5c0c8fde3f037733f23.jpeg","article_res/cover/9ea8edff2d5d303ff3fffff3f6f9c3d9.jpeg","NVIDIA's open-source 3D project LLaMA-Mesh","LLaMA-Mesh: Unifying 3D Mesh Generation with Language Models",{"id":144,"publish_date":145,"is_original":23,"collection":146,"cover_url":147,"cover_url_1_1":148,"title":149,"summary":150,"author":28},131,"2024-11-10","#OpenAI","article_res/cover/87f8ed353ce39f31960e7cdfaf075a35.jpeg","article_res/cover/f597a63935f5cd32e484b4aadd6019e8.jpeg","ChatGPT has launched the Search function","Get fast, timely answers with links to relevant web sources.",{"big":152,"small":214},[153,181],{"title":154,"list":155},"AGENT",[156,157,165,173],{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":158,"publish_date":159,"is_original":23,"collection":160,"cover_url":161,"cover_url_1_1":162,"title":163,"summary":164,"author":28},622,"2025-03-24","#OWL #AI Agent #MAS #MCP #CUA","article_res/cover/cb50ca7f2bf4d1ed50202d7406e1c19a.jpeg","article_res/cover/4aa7aa3badfacf3cc84121334f1050dd.jpeg","OWL: Multi-agent collaboration","OWL: Optimized Workforce Learning for General Multi-Agent Assistance in Real-World Task Automation",{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},620,"2025-03-26","#LLM #Google #Gemini #AI Agent","article_res/cover/53751a6dbbe990b1eb0b63f3b062aed4.jpeg","article_res/cover/031344981f0a212ff82d1f3a64aa5756.jpeg","Gemini 2.5 Pro, claimed to be far ahead of the competition, has been released with great fanfare: comprehensively surpassing other LLMs and topping the global rankings","Gemini 2.5: Our most intelligent AI model",{"id":174,"publish_date":175,"is_original":23,"collection":176,"cover_url":177,"cover_url_1_1":178,"title":179,"summary":180,"author":28},616,"2025-03-29","#MAS #AI Agent #AI Coder #MetaGPT #MGX","article_res/cover/9dcd702ad2035902e5e77967c34a1f1e.jpeg","article_res/cover/0a97fc4a922753c8f46ff38792020df8.jpeg","MGX - An automated website-building platform composed of multiple AI Agents","Your 24/7 AI Team | Dream, Chat, Create.",{"title":182,"list":183},"OPENAI",[184,191,199,206],{"id":185,"publish_date":167,"is_original":23,"collection":186,"cover_url":187,"cover_url_1_1":188,"title":189,"summary":190,"author":28},619,"#OpenAI #AI Image Generator #4o #MMM #AR Transformer","article_res/cover/2faffc97fcecf3151552cb0fd3206d89.jpeg","article_res/cover/1133cb4948af44cee2e7fbe79efb69e5.jpeg","The native image function of GPT-4o is officially launched","Introducing 4o Image Generation",{"id":192,"publish_date":193,"is_original":4,"collection":194,"cover_url":195,"cover_url_1_1":196,"title":197,"summary":198,"author":28},434,"2023-07-15","#Anthropic #OpenAI #Google #AI Code Generator #Claude","article_res/cover/e1b6f600a2b9f262a4392684e5f2ce25.jpeg","article_res/cover/6e1772e83f78f9a351ab23d3e414adee.jpeg","Latest Updates on Google Bard /Anthropic Claude2 / ChatGPT Code Interpreter","We want our models to use their programming skills to provide more natural interfaces to the basic functions of our computers.  \n - OpenAI",{"id":200,"publish_date":201,"is_original":4,"collection":146,"cover_url":202,"cover_url_1_1":203,"title":204,"summary":205,"author":28},417,"2023-08-24","article_res/cover/bccf897d50a88b18364e35f7466387e0.jpeg","article_res/cover/2f871085c1073717c1703ae86e18056f.jpeg","The GPT-3.5 Turbo fine-tuning (fine-tuning function) has been released～","Developers can now bring their own data to customize GPT-3.5 Turbo for their use cases.",{"id":207,"publish_date":208,"is_original":4,"collection":209,"cover_url":210,"cover_url_1_1":211,"title":212,"summary":213,"author":28},407,"2023-09-22","#OpenAI #AI Image Generator","article_res/cover/c59005e903d35cfc32346e2756e2728a.jpeg","article_res/cover/ba011d265e6d84b5c8cb6fd6b757b6cc.jpeg","Dall-E 3","DALL·E 3 understands significantly more nuance and detail, allowing you to easily translate your ideas into images.",[215,221,241],{"title":10,"list":216},[217,218,219,220],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":223},"GOOGLE",[224,225,226,234],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},615,"2025-03-30","#AI Researcher #AI Science #HKU #Google #AI Agent","article_res/cover/21fadf906067714bb0db31ae13a77c15.jpeg","article_res/cover/2697999a72bd26b22e85f0e92936d3ed.jpeg","AI-Researcher: LLM-driven全自动 scientific research assistant","AI-Researcher: Fully-Automated Scientific Discovery with LLM Agents  \nOpen-Sourced Alternative to Google AI Co-Scientist",{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},463,"2023-05-09","article_res/cover/89800f207723acdb55fc53bf999ebdc9.jpeg","article_res/cover/5764f369b4accd8f83e94aa4c077a175.jpeg","The Smallville sandbox world - A town with 25 virtual residents","Believable proxies of human behavior can empower interactive apps: Immersive environment, Rehearsal space, Prototyping tool",{"title":242,"list":243},"NVIDIA",[],true,{"code":4,"msg":5,"data":246},{"id":247,"publish_date":248,"is_original":4,"collection":249,"articles_id":250,"cover_url":251,"cover_url_1_1":252,"title":253,"summary":254,"author":28,"content":255,"popular":256,"list":317,"category":373,"tag":374},395,"2023-10-12","#OpenAI #Object Detection","wKkuOa3gN0zFfxwWyJwByw","article_res/cover/f8c5b597c95350dbc7e6de1c3eee53f3.jpeg","article_res/cover/6e0dcb7df22b801f602bdba9aaf25e0d.jpeg","ChatGPT's application capabilities in the visual domain - Advanced Level 2","Show ChatGPT one or more images.","\u003Cdiv class=\"rich_media_content js_underline_content\n                       autoTypeSetting24psection\n            \" id=\"js_content\">\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>), yesterday we covered input methods and operational techniques. Today, continuing from yesterday's sharing, let's explore the boundaries of GPT-4's visual language capabilities, as her abilities are quite strong, so we will be learning over two days.\u003C/p>\u003Ch2 data-tool=\"mdnice编辑器\" style='margin-top: 30px;margin-bottom: 15px;font-weight: bold;font-size: 22px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;letter-spacing: normal;text-align: left;text-wrap: wrap;'>GPT-4's Visual Language Capabilities (Part 1)\u003C/h2>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>Following yesterday's sharing, let's take a look at GPT-4's performance in terms of visual language capabilities:\u003C/p>\u003Ch3 data-tool=\"mdnice编辑器\" style='margin-top: 30px;margin-bottom: 15px;font-weight: bold;font-size: 20px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;letter-spacing: normal;text-align: left;text-wrap: wrap;'>Image descriptions from different domains\u003C/h3>\u003Col data-tool=\"mdnice编辑器\" class=\"list-paddingleft-1\" style='margin-top: 8px;margin-bottom: 8px;padding-left: 25px;width: 557.438px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;'>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">: GPT-4 can understand a scene where the current U.S. President delivers a speech at the 2023 G7 Summit. This demonstrates the model's ability to generalize and handle new scenarios, such as the 2023 G7 Summit, even though this specific scenario was not part of its training data.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-ratio=\"1.078740157480315\" data-s=\"300,640\" data-type=\"png\" data-w=\"762\" style=\"\" src=\"./assets/17423814075530.011128839637779553.png\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">: GPT-4 can accurately identify the Space Needle in Seattle, Washington, knows that it was built for the 1962 World's Fair, and has since become an icon of the city.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-ratio=\"1.415617128463476\" data-s=\"300,640\" data-type=\"png\" data-w=\"794\" style=\"\" src=\"./assets/17423814075710.3536321537912517.png\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">: GPT-4 can effectively capture complex details in images, enabling it to identify specific ingredients, garnishes, or cooking techniques in dishes.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-ratio=\"1.1914357682619647\" data-s=\"300,640\" data-type=\"png\" data-w=\"794\" style=\"\" src=\"./assets/17423814075700.8591747606599356.png\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">: GPT-4 can recognize common conditions, such as Jones fractures.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-ratio=\"0.675\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423814078240.8179853913060784.png\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">：GPT-4 can provide descriptions of novel or emerging logos and icons, such as the recently released Microsoft 365 Copilot.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-ratio=\"0.9729119638826185\" data-s=\"300,640\" data-type=\"png\" data-w=\"886\" style=\"\" src=\"./assets/17423814078240.24766071594899675.png\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">：GPT-4 can describe roads as well as the positions and colors of vehicles. It can also read signs and note the speed limit for that road.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-ratio=\"0.7055555555555556\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423814082370.7772837683797815.png\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">：GPT-4 can correctly describe the content of an image when faced with misleading questions or instructions.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-ratio=\"0.8683602771362586\" data-s=\"300,640\" data-type=\"png\" data-w=\"866\" style=\"\" src=\"./assets/17423814133600.20084306975702715.png\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003C/section>\u003C/li>\u003C/ol>\u003Ch3 data-tool=\"mdnice编辑器\" style='margin-top: 30px;margin-bottom: 15px;font-weight: bold;font-size: 20px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;letter-spacing: normal;text-align: left;text-wrap: wrap;'>Object localization, counting, and dense captioning\u003C/h3>\u003Col data-tool=\"mdnice编辑器\" class=\"list-paddingleft-1\" style='margin-top: 8px;margin-bottom: 8px;padding-left: 25px;width: 557.438px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;'>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">: GPT-4 can identify the spatial relationships between people and cars in an image and point out that the camera angle may affect the perceived size.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-ratio=\"0.5895372233400402\" data-s=\"300,640\" data-type=\"png\" data-w=\"994\" style=\"\" src=\"./assets/17423814098060.3139712513942883.png\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">: GPT-4 can successfully calculate the number of objects present in an image.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-ratio=\"0.9227467811158798\" data-s=\"300,640\" data-type=\"png\" data-w=\"466\" style=\"\" src=\"./assets/17423814082040.31871964778310846.png\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">: GPT-4 demonstrates the ability to generate bounding box coordinates in text format without separate textual box markers.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-ratio=\"1.6929133858267718\" data-s=\"300,640\" data-type=\"png\" data-w=\"508\" style=\"\" src=\"./assets/17423814080760.25427313852140254.png\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">: GPT-4 can successfully locate and identify individuals in images, then provide concise descriptions of the individuals in the image.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-ratio=\"1.029739776951673\" data-s=\"300,640\" data-type=\"png\" data-w=\"538\" src=\"./assets/17423814094290.13195420800263147.png\">\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-ratio=\"0.8\" data-s=\"300,640\" data-type=\"png\" data-w=\"810\" style=\"\" src=\"./assets/17423814097840.7589627140703867.png\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003C/section>\u003C/li>\u003C/ol>\u003Ch3 data-tool=\"mdnice编辑器\" style='margin-top: 30px;margin-bottom: 15px;font-weight: bold;font-size: 20px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;letter-spacing: normal;text-align: left;text-wrap: wrap;'>Multimodal Knowledge, Common Sense\u003C/h3>\u003Col data-tool=\"mdnice编辑器\" class=\"list-paddingleft-1\" style='margin-top: 8px;margin-bottom: 8px;padding-left: 25px;width: 557.438px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;'>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">: GPT-4 has the remarkable ability to gather information from visual and text modes, then understand the humor embedded in MEMEs.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-ratio=\"1.425531914893617\" data-s=\"300,640\" data-type=\"png\" data-w=\"658\" style=\"\" src=\"./assets/17423814129260.6682803897405134.png\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003C/section>\u003C/li>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">\u003Cp style=\"padding-top: 8px;padding-bottom: 8px;line-height: 26px;color: black;\">: GPT-4 can identify the average particle velocity of Sample A and Sample B. By considering the relationship between particle velocity, kinetic energy, and temperature, GPT-4 correctly answers the question.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-ratio=\"0.6657407407407407\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423814088480.604847795207266.png\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003C/section>\u003C/li>\u003C/ol>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>\", we observe that the generated answers adopt a tutorial format and gradually explain the topic.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-ratio=\"1.0416666666666667\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423814157440.3079309271031183.png\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003Col start=\"3\" data-tool=\"mdnice编辑器\" class=\"list-paddingleft-1\" style='margin-top: 8px;margin-bottom: 8px;padding-left: 25px;width: 557.438px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;'>\u003Cli>\u003Csection style=\"margin-top: 5px;margin-bottom: 5px;line-height: 26px;color: rgb(1, 1, 1);\">: Based on the formal dresses worn by [person1] and [person2] and the floral decorations present in the scene, it can be inferred that they are attending a wedding.\u003C/section>\u003C/li>\u003C/ol>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>?”, GPT-4V demonstrated its ability to distinguish numerous subtle visual clues in images and provided a list of reasonable assumptions.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-ratio=\"0.6796296296296296\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423814088830.6441802944819872.png\">\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: black;font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>Summary: GPT-4 performs excellently in visual language capabilities, showcasing strong understanding and processing abilities in image descriptions across various fields. Additionally, it excels in spatial relationship understanding, object counting, object positioning, and dense captioning. Moreover, GPT-4 also demonstrates remarkable abilities in multimodal knowledge and common sense.\u003C/p>\u003Cp style=\"display: none;\">\u003Cmp-style-type data-value=\"3\">\u003C/mp-style-type>\u003C/p>\u003C/div>",[257,261,269,277,285,293,301,309],{"id":192,"title_md5":258,"publish_date":193,"author_md5":259,"is_original":4,"collection":194,"summary_md5":260,"cover_url":195,"cover_url_1_1":196},"a39f66d5a2901fac19dfcbdc8a1c163c","bc27fa490c4d0d525bac812fc0793534","3383b510f8d3e2c32419df8cd3b321aa",{"id":262,"title_md5":263,"publish_date":264,"author_md5":265,"is_original":4,"collection":5,"summary_md5":266,"cover_url":267,"cover_url_1_1":268},519,"473eeb6464b833395bbb6e95ea38b175","2022-06-07","7051dc52c184c205e39aa54b4664ae9b","c9d3436a7a56ab67ccbb37ce2ad0e3b7","article_res/cover/a6e71ca7dc0b962f78e7c169ca6caf79.jpeg","article_res/cover/3c066e7f23b38b0f8e77d9b5be4911cd.jpeg",{"id":270,"title_md5":271,"publish_date":272,"author_md5":259,"is_original":23,"collection":273,"summary_md5":274,"cover_url":275,"cover_url_1_1":276},24,"7297f7ed5ee21682da73966bfad6782d","2025-02-28","#Deepseek #AGI","91de1813cc0f857f14b5b52ea0a16c0b","article_res/cover/5f7d58e411f4f45eb94fc458c2b7acc6.jpeg","article_res/cover/d4a0130cd0de159f3d77b43579691da2.jpeg",{"id":278,"title_md5":279,"publish_date":280,"author_md5":259,"is_original":23,"collection":281,"summary_md5":282,"cover_url":283,"cover_url_1_1":284},292,"016f41c20e5237d5c2ae4153630fc7b9","2024-04-26","#AI Index Report 2024 #AI 3D Generator #ByteDance","3b1777e6a2a6378cf0202d4e44c78650","article_res/cover/105e1981ec7386a423eb9fa6c68449d9.jpeg","article_res/cover/9ca04e56d9ef82a2c7ef034860486313.jpeg",{"id":286,"title_md5":287,"publish_date":288,"author_md5":259,"is_original":23,"collection":289,"summary_md5":290,"cover_url":291,"cover_url_1_1":292},234,"e842944aa36002944549995f94c4f5a6","2024-06-26","#ByteDance","3c3fba958181ef316e01406c17a44d2b","article_res/cover/868024df3a0a705b5115e55d12e524e7.jpeg","article_res/cover/3d403ab6a5adcb433a89c05da59a35bc.jpeg",{"id":294,"title_md5":295,"publish_date":296,"author_md5":259,"is_original":23,"collection":297,"summary_md5":298,"cover_url":299,"cover_url_1_1":300},30,"09f08af3b7cc2fca302a7f311722781d","2025-02-23","#Adobe #AI Video Generation","eaf31b00bd80ce40bd53d4ebfc2237a8","article_res/cover/9dd4f04dd89a546e531684e7d40bd36d.jpeg","article_res/cover/af5242c4538850399563233330183a91.jpeg",{"id":302,"title_md5":303,"publish_date":304,"author_md5":259,"is_original":23,"collection":305,"summary_md5":306,"cover_url":307,"cover_url_1_1":308},18,"838d1395c65767af85fec980ac9f3bb8","2025-03-05","#AI Stock #LLM #AI Agents","2dc8164149c434a3907ded4605b65637","article_res/cover/4ab06a737f424174b566860de6a8f152.jpeg","article_res/cover/616077e8acb0c8a79d93f4b08a9cbdd1.jpeg",{"id":310,"title_md5":311,"publish_date":312,"author_md5":259,"is_original":23,"collection":313,"summary_md5":314,"cover_url":315,"cover_url_1_1":316},207,"7a39aaf30ce3a56e97eff69e83d8773e","2024-08-01","#Buffett","f2fe71f1fe3b22d3e284a0c3b475f7fd","article_res/cover/1aa0c688a3c0c88bdbe206fef021d450.jpeg","article_res/cover/22e9be4de3802e882c2d274130d477d5.jpeg",{"related":318,"small":358},[319,327,334,342,350],{"id":320,"publish_date":321,"is_original":4,"collection":5,"cover_url":322,"cover_url_1_1":323,"title":324,"summary":325,"author":326},607,"2022-03-10","article_res/cover/45e47d1f2bd97ea83773a8250da38e0e.jpeg","article_res/cover/e68da0da35ad8c14b37e94d666ed5e32.jpeg","How to read financial statements - Financial statements - \"Beginner's Guide\"","The reaction of weak management to weak operations is often weak accounting.","Course notes",{"id":328,"publish_date":329,"is_original":4,"collection":5,"cover_url":330,"cover_url_1_1":331,"title":332,"summary":333,"author":28},349,"2024-01-04","article_res/cover/0c7bf8386a5c585338c8469f5c38c559.jpeg","article_res/cover/8faed64b23d5026b6633789da61e701c.jpeg","\"CRYPTO THESES 2024\" on CeFi Trends (Part 2)","If we see a sustained price rally, Wall Street will go shopping.",{"id":335,"publish_date":336,"is_original":23,"collection":337,"cover_url":338,"cover_url_1_1":339,"title":340,"summary":341,"author":28},106,"2024-12-09","#AI 3D Generator #World Model","article_res/cover/30b5113d0c0ab7ca89b737e3b7d4c5dc.jpeg","article_res/cover/a289285a914452f0d3295fec320267ae.jpeg","The First Step of Li Feifei's Lab in Exploring Spatial Intelligence: Generating a 3D World from a Single Image","生成世界",{"id":343,"publish_date":344,"is_original":23,"collection":345,"cover_url":346,"cover_url_1_1":347,"title":348,"summary":349,"author":28},76,"2025-01-07","#AI Video Generation #Google","article_res/cover/3613b008d99e99c267bce18c2c7f4003.jpeg","article_res/cover/055bce99535eb3d30085f963064e930a.jpeg","Vision Transformer (ViT)","An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale",{"id":351,"publish_date":352,"is_original":23,"collection":353,"cover_url":354,"cover_url_1_1":355,"title":356,"summary":357,"author":28},71,"2025-01-14","#Nvidia #World Foundation Model #Cosmos #Physical AI #Embodied AI","article_res/cover/feddf8c832dfb45d28804291f6a42a9e.jpeg","article_res/cover/d6bc2f1186d96b78228c2283a17a3645.jpeg","NVIDIA's Cosmos World Model","Cosmos World Foundation Model Platform for Physical AI",[359,365,371],{"title":10,"list":360},[361,362,363,364],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":366},[367,368,369,370],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},{"title":242,"list":372},[],[8,9,10],[8,12,13,14,9,10,15,16,17,18],["Reactive",245],1754646410281]