[{"data":1,"prerenderedAt":379},["ShallowReactive",2],{"$fgukOamtKU1RtUiMFsqdObttmqPPQz0uc7bl_gj_LyX0":3,"$f7g848vSygr5TyT6YXQ9SzaIQ__Zxwxk-cq2I0UETCAU":245,"article-283":378},{"code":4,"msg":5,"data":6},0,"",{"category":7,"tag":11,"popular":19,"latest":86,"banner":126,"list":151,"cache":244},[8,9,10],"Agent","OpenAI","LLM",[8,12,13,14,9,10,15,16,17,18],"Google","Nvidia","Claude","DeepSeek","OCR","Chat","Generator",[20,29,37,45,54,62,70,79],{"id":21,"publish_date":22,"is_original":23,"collection":5,"cover_url":24,"cover_url_1_1":25,"title":26,"summary":27,"author":28},411,"2023-09-10",1,"article_res/cover/451ef50c225a8dc61c4336506794d13b.jpeg","article_res/cover/3ba9dc7a72f87d40b20fc2d225289ee3.jpeg","Idealism","Reality is created by the mind, we can change our reality by changing our mind. - Plato","Renee's Entrepreneurial Journey",{"id":30,"publish_date":31,"is_original":23,"collection":32,"cover_url":33,"cover_url_1_1":34,"title":35,"summary":36,"author":28},108,"2024-12-07","#LLM #AGI #AI Agent","article_res/cover/0039044422e4ec9f61c18e8ee1693bb0.jpeg","article_res/cover/4220971b108a91d21407d87bb02fbaa6.jpeg","Freysa.ai: The World's First Adversarial AI Agent Game","说服 Freysa 把钱包里的钱都拿出来",{"id":38,"publish_date":39,"is_original":23,"collection":40,"cover_url":41,"cover_url_1_1":42,"title":43,"summary":44,"author":28},12,"2025-03-09","#Oxford #Reasoning #LLM #Tool Use","article_res/cover/d448e9b3617a0b5302e1bd10c438bca9.jpeg","article_res/cover/864a468f9cc4c9317efadb3811909888.jpeg","Agentic Reasoning Framework - Significantly enhance the reasoning ability of LLMs through the integration of external tools using agents","Agentic Reasoning: Reasoning LLMs with Tools for Deep Research",{"id":46,"publish_date":47,"is_original":4,"collection":48,"cover_url":49,"cover_url_1_1":50,"title":51,"summary":52,"author":53},480,"2023-04-14","#Stable Diffusion","article_res/cover/0bdbe7cb1de4a78e54536e5d9afa7ec9.jpeg","article_res/cover/b3d6ffec0608dcfaf18c5a69906d1490.jpeg","【AIGC Learning】Generate Prompts Using Word Graphs - Stable Diffusion Web UI Series 13","AI will become a powerful tool in education, transforming the way we learn and deliver instruction.  \n- Reid Hoffman","--",{"id":55,"publish_date":56,"is_original":4,"collection":57,"cover_url":58,"cover_url_1_1":59,"title":60,"summary":61,"author":28},413,"2023-09-08","#Neuroscience","article_res/cover/74f8302d78a23d9430f22171eae136b6.jpeg","article_res/cover/87ca08af81bb304746be5261160964c0.jpeg","Can machines be conscious?","Do we have an ethical obligation to not turn off conscious machines? Would turning them off be murder? No. I don't lose any sleep over unplugging a conscious machine.\n- Jeff Hawkins, \"A Thousand Brains\"",{"id":63,"publish_date":64,"is_original":23,"collection":65,"cover_url":66,"cover_url_1_1":67,"title":68,"summary":69,"author":28},178,"2024-09-09","#Entrepreneurship","article_res/cover/a7224f025b55d1820408085faef63079.jpeg","article_res/cover/11a9995b096cbf64465ef01b8673b154.jpeg","37signals company","This damn sense of relaxation",{"id":71,"publish_date":72,"is_original":4,"collection":73,"cover_url":74,"cover_url_1_1":75,"title":76,"summary":77,"author":78},460,"2023-05-12","#Google","article_res/cover/b970687b12faa52da976f91248c2aa7b.jpeg","article_res/cover/d1e71b52cfd2c63bc6e71f3e85ff135c.jpeg","Learn what BRC-20 and Ordinals are using Google Bard","Ordinals - a new protocol that allows users to store arbitrary data on the Bitcoin blockchain","Google Bard mainly writes",{"id":80,"publish_date":81,"is_original":23,"collection":5,"cover_url":82,"cover_url_1_1":83,"title":84,"summary":85,"author":28},309,"2024-03-26","article_res/cover/9877f95894ee88532d0e6012c23a2df3.jpeg","article_res/cover/20092164ddc109ce6ae56b1984246751.jpeg","Learning the Cancun Upgrade with lepton and perplexity","Building a quick conversation-based search demo with Lepton AI.",[87,95,103,111,119],{"id":88,"publish_date":89,"is_original":23,"collection":90,"cover_url":91,"cover_url_1_1":92,"title":93,"summary":94,"author":28},627,"2025-03-20","#AI Avatar #AI Video Generation","article_res/cover/d95481358f73924989f8c4ee9c75d1c8.jpeg","article_res/cover/b74bc0fab01f8b6a6aa87696c0c3ed8b.jpeg","DisPose: Generating Animated Videos by Driving Video with Reference Images","DisPose is a controllable human image animation method that enhances video generation.",{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},626,"2025-03-21","#Deep Dive into LLMs #LLM #RL #Andrej Karpathy #AlphaGo","article_res/cover/446553a5c8f8f2f07d97b20eaee84e56.jpeg","article_res/cover/e6c2823409c9b34624064b9acbaca6f1.jpeg","AlphaGo and the Power of Reinforcement Learning - Andrej Karpathy's Deep Dive on LLMs (Part 9)","Simply learning from humans will never surpass human capabilities.",{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},625,"2025-03-22","#Deep Dive into LLMs #LLM #RL #RLHF #Andrej Karpathy","article_res/cover/8da81d38b1e5cf558a164710fd8a5389.jpeg","article_res/cover/96f028d76c362a99a0dd56389e8f7a9b.jpeg","Reinforcement Learning from Human Feedback (RLHF) - Andrej Karpathy's Deep Dive on LLMs (Part 10)","Fine-Tuning Language Models from Human Preferences",{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},624,"2025-03-23","#Deep Dive into LLMs #LLM #Andrej Karpathy #AI Agent #MMM","article_res/cover/a5e7c3d48bb09109684d6513287c661d.jpeg","article_res/cover/d3f22b7c0ab8d82fd2da457a299e0773.jpeg","The Future of Large Language Models - Andrej Karpathy's In-Depth Explanation of LLM (Part 11)","preview of things to come",{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},623,"#Google #Voe #AI Video Generation","article_res/cover/c44062fea0f336c2b96b3928292392c2.jpeg","article_res/cover/a041041c69092ad3db191c5bf3ff981b.jpeg","Trial of Google's video generation model VOE2","Our state-of-the-art video generation model",[127,135,143],{"id":128,"publish_date":129,"is_original":23,"collection":130,"cover_url":131,"cover_url_1_1":132,"title":133,"summary":134,"author":28},300,"2024-04-16","#AI in Science #AGI","article_res/cover/6bf01e793e0f33e848572412eebdf9b0.jpeg","article_res/cover/91a5ee21dafecb914fabeb9430d46ec1.jpeg","Would Einstein lose his job - AI and Quantum Computing: A Glimpse into the Near Future","So Einstein's job is still safe.",{"id":136,"publish_date":137,"is_original":23,"collection":138,"cover_url":139,"cover_url_1_1":140,"title":141,"summary":142,"author":28},101,"2024-12-14","#Nvidia #AI 3D Generator","article_res/cover/693e07c85980c5c0c8fde3f037733f23.jpeg","article_res/cover/9ea8edff2d5d303ff3fffff3f6f9c3d9.jpeg","NVIDIA's open-source 3D project LLaMA-Mesh","LLaMA-Mesh: Unifying 3D Mesh Generation with Language Models",{"id":144,"publish_date":145,"is_original":23,"collection":146,"cover_url":147,"cover_url_1_1":148,"title":149,"summary":150,"author":28},131,"2024-11-10","#OpenAI","article_res/cover/87f8ed353ce39f31960e7cdfaf075a35.jpeg","article_res/cover/f597a63935f5cd32e484b4aadd6019e8.jpeg","ChatGPT has launched the Search function","Get fast, timely answers with links to relevant web sources.",{"big":152,"small":214},[153,181],{"title":154,"list":155},"AGENT",[156,157,165,173],{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":158,"publish_date":159,"is_original":23,"collection":160,"cover_url":161,"cover_url_1_1":162,"title":163,"summary":164,"author":28},622,"2025-03-24","#OWL #AI Agent #MAS #MCP #CUA","article_res/cover/cb50ca7f2bf4d1ed50202d7406e1c19a.jpeg","article_res/cover/4aa7aa3badfacf3cc84121334f1050dd.jpeg","OWL: Multi-agent collaboration","OWL: Optimized Workforce Learning for General Multi-Agent Assistance in Real-World Task Automation",{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},620,"2025-03-26","#LLM #Google #Gemini #AI Agent","article_res/cover/53751a6dbbe990b1eb0b63f3b062aed4.jpeg","article_res/cover/031344981f0a212ff82d1f3a64aa5756.jpeg","Gemini 2.5 Pro, claimed to be far ahead of the competition, has been released with great fanfare: comprehensively surpassing other LLMs and topping the global rankings","Gemini 2.5: Our most intelligent AI model",{"id":174,"publish_date":175,"is_original":23,"collection":176,"cover_url":177,"cover_url_1_1":178,"title":179,"summary":180,"author":28},616,"2025-03-29","#MAS #AI Agent #AI Coder #MetaGPT #MGX","article_res/cover/9dcd702ad2035902e5e77967c34a1f1e.jpeg","article_res/cover/0a97fc4a922753c8f46ff38792020df8.jpeg","MGX - An automated website-building platform composed of multiple AI Agents","Your 24/7 AI Team | Dream, Chat, Create.",{"title":182,"list":183},"OPENAI",[184,191,199,206],{"id":185,"publish_date":167,"is_original":23,"collection":186,"cover_url":187,"cover_url_1_1":188,"title":189,"summary":190,"author":28},619,"#OpenAI #AI Image Generator #4o #MMM #AR Transformer","article_res/cover/2faffc97fcecf3151552cb0fd3206d89.jpeg","article_res/cover/1133cb4948af44cee2e7fbe79efb69e5.jpeg","The native image function of GPT-4o is officially launched","Introducing 4o Image Generation",{"id":192,"publish_date":193,"is_original":4,"collection":194,"cover_url":195,"cover_url_1_1":196,"title":197,"summary":198,"author":28},434,"2023-07-15","#Anthropic #OpenAI #Google #AI Code Generator #Claude","article_res/cover/e1b6f600a2b9f262a4392684e5f2ce25.jpeg","article_res/cover/6e1772e83f78f9a351ab23d3e414adee.jpeg","Latest Updates on Google Bard /Anthropic Claude2 / ChatGPT Code Interpreter","We want our models to use their programming skills to provide more natural interfaces to the basic functions of our computers.  \n - OpenAI",{"id":200,"publish_date":201,"is_original":4,"collection":146,"cover_url":202,"cover_url_1_1":203,"title":204,"summary":205,"author":28},417,"2023-08-24","article_res/cover/bccf897d50a88b18364e35f7466387e0.jpeg","article_res/cover/2f871085c1073717c1703ae86e18056f.jpeg","The GPT-3.5 Turbo fine-tuning (fine-tuning function) has been released～","Developers can now bring their own data to customize GPT-3.5 Turbo for their use cases.",{"id":207,"publish_date":208,"is_original":4,"collection":209,"cover_url":210,"cover_url_1_1":211,"title":212,"summary":213,"author":28},407,"2023-09-22","#OpenAI #AI Image Generator","article_res/cover/c59005e903d35cfc32346e2756e2728a.jpeg","article_res/cover/ba011d265e6d84b5c8cb6fd6b757b6cc.jpeg","Dall-E 3","DALL·E 3 understands significantly more nuance and detail, allowing you to easily translate your ideas into images.",[215,221,241],{"title":10,"list":216},[217,218,219,220],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":223},"GOOGLE",[224,225,226,234],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},615,"2025-03-30","#AI Researcher #AI Science #HKU #Google #AI Agent","article_res/cover/21fadf906067714bb0db31ae13a77c15.jpeg","article_res/cover/2697999a72bd26b22e85f0e92936d3ed.jpeg","AI-Researcher: LLM-driven全自动 scientific research assistant","AI-Researcher: Fully-Automated Scientific Discovery with LLM Agents  \nOpen-Sourced Alternative to Google AI Co-Scientist",{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},463,"2023-05-09","article_res/cover/89800f207723acdb55fc53bf999ebdc9.jpeg","article_res/cover/5764f369b4accd8f83e94aa4c077a175.jpeg","The Smallville sandbox world - A town with 25 virtual residents","Believable proxies of human behavior can empower interactive apps: Immersive environment, Rehearsal space, Prototyping tool",{"title":242,"list":243},"NVIDIA",[],true,{"code":4,"msg":5,"data":246},{"id":247,"publish_date":248,"is_original":23,"collection":249,"articles_id":250,"cover_url":251,"cover_url_1_1":252,"title":253,"summary":254,"author":28,"content":255,"popular":256,"list":321,"category":376,"tag":377},283,"2024-05-05","#AI Index Report 2024 #LLM #Alignment","iKe3w_3-v5gNDxaVPzw5-w","article_res/cover/6182044281bbccd7ee6db9e3e4c8783c.jpeg","article_res/cover/baeb687f1c5504562e618281de151def.jpeg","\"The 2024 Artificial Intelligence Index Report\" - 2.11 Characteristics of LLM","LLMs exhibit emergent abilities, meaning they can unpredictably and suddenly display new capabilities at larger scales.","\u003Cdiv class=\"rich_media_content js_underline_content\n                       autoTypeSetting24psection\n            \" id=\"js_content\">\u003Ch2 data-tool=\"mdnice编辑器\" style='margin-top: 30px;margin-bottom: 15px;font-weight: bold;font-size: 22px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;letter-spacing: normal;text-align: left;text-wrap: wrap;'>Emergent behavior - Actually not\u003C/h2>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>In Chapter 2.11 of the report, \"emergent behavior\" (Emergent Behavior) is discussed. Many research papers point out that large language models (LLMs) exhibit emergent capabilities, meaning they may unpredictably display new abilities as their scale increases. This has raised concerns that larger models might develop surprising, and potentially uncontrollable, new abilities.\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>However, Stanford University's research challenges this view, arguing that the emergence of new capabilities often reflects the evaluation benchmark rather than an inherent property of the model itself (this was also mentioned in a previous sharing by Professor Fei-Fei Li, which suggests that it's not the large model exhibiting emergent behavior, but rather our measurement capabilities have not kept up). Researchers found that when non-linear or discontinuous evaluation metrics (such as multiple-choice scoring) are used, the model's emergent capabilities appear more pronounced. Conversely, when linear or continuous metrics are used, most of these capabilities disappear. By analyzing a series of benchmarks from BIG-bench, a comprehensive LLM evaluation tool, researchers observed emergent capabilities in only 5 out of 39 benchmarks.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100004397\" data-ratio=\"0.7166666666666667\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423806007020.8090298002896306.png\">\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>These findings have significant implications for AI safety and alignment research, as they challenge a widely held belief that AI models will inevitably learn new, unpredictable behaviors during the scaling process.\u003C/p>\u003Ch2 data-tool=\"mdnice编辑器\" style='margin-top: 30px;margin-bottom: 15px;font-weight: bold;font-size: 22px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;letter-spacing: normal;text-align: left;text-wrap: wrap;'>Performance changes - Getting dumber\u003C/h2>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>Publicly available closed-source large language models (LLMs) like GPT-4, Claude 2, and Gemini are frequently updated by their developers based on new data or user feedback. However, there is little research on how the performance of these models changes after updates—if it changes at all.\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>A study conducted by Stanford University and UC Berkeley explored the performance changes over time of certain publicly available LLMs and highlighted that their performance can actually undergo significant fluctuations. Specifically, the study compared versions of GPT-3.5 and GPT-4 from March 2023 and June 2023, showing a decline in performance across multiple tasks (essentially becoming \"less smart\"). For instance, compared to the March version, the June version of GPT-4 saw a 42 percentage point drop in code generation, a 16 percentage point drop in answering sensitive questions, and a 33 percentage point drop in some math tasks.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100004398\" data-ratio=\"1.0314814814814814\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423806007050.7136610899370173.png\">\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>The researchers also found that GPT-4's ability to follow instructions weakened over time, which may explain the broader decline in performance. This study highlights how LLM performance can evolve over time and suggests that regular users should be aware of these changes.\u003C/p>\u003Ch2 data-tool=\"mdnice编辑器\" style='margin-top: 30px;margin-bottom: 15px;font-weight: bold;font-size: 22px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;letter-spacing: normal;text-align: left;text-wrap: wrap;'>Self-correction - Unlikely\u003C/h2>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>It is commonly believed that large language models like GPT-4 have limitations in reasoning and sometimes produce false hallucinated information. One potential solution to this issue is self-correction, where LLMs can identify and correct their own reasoning flaws. As AI plays an increasingly important role in society, the concept of intrinsic self-correction—allowing LLMs to autonomously correct reasoning errors without external guidance—is particularly appealing. However, it remains unclear whether LLMs truly possess this capability for self-correction.\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>Researchers from DeepMind and the University of Illinois Urbana-Champaign tested GPT-4's performance on three reasoning benchmarks: GSM8K (elementary mathematics), CommonSenseQA (commonsense reasoning), and HotpotQA (multi-document reasoning). They found that when the model decided by itself whether to self-correct without guidance, its performance dropped across all tested benchmarks.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100004399\" data-ratio=\"0.5675925925925925\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423806007030.5599019785270001.png\">\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>This research is akin to watching LLMs perform a high-wire act without a safety net. The results show that without external guidance and support, these models struggle to self-correct their paths and may instead veer further off course. These findings pose new challenges for AI development and application, suggesting that we may still need more external checks and balancing mechanisms when designing and implementing AI technologies.\u003C/p>\u003Ch2 data-tool=\"mdnice编辑器\" style='margin-top: 30px;margin-bottom: 15px;font-weight: bold;font-size: 22px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;letter-spacing: normal;text-align: left;text-wrap: wrap;'>Open Source vs Closed Source - Closed Source Wins Big\u003C/h2>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>There is a significant performance gap between open-source and closed-source models. A comparison was made between the top closed-source models and open-source models across a series of benchmarks. In all selected benchmarks, closed-source models outperformed open-source models.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100004401\" data-ratio=\"0.3972222222222222\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423806007040.09858779304343135.png\">\u003C/p>\u003Cp data-tool=\"mdnice编辑器\" style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;color: rgb(0, 0, 0);font-family: Optima-Regular, Optima, PingFangSC-light, PingFangTC-light, \"PingFang SC\", Cambria, Cochin, Georgia, Times, \"Times New Roman\", serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;line-height: 26px;'>Specifically, across 10 selected benchmarks, the median performance advantage of closed-source models was 24.2%, with differences ranging from 4.0% in mathematical tasks like GSM8K to 317.7% in agent tasks like AgentBench.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img\" data-galleryid=\"\" data-imgfileid=\"100004400\" data-ratio=\"0.5666666666666667\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423806007740.8979997451538482.png\">\u003C/p>\u003Cp style=\"display: none;\">\u003Cmp-style-type data-value=\"3\">\u003C/mp-style-type>\u003C/p>\u003C/div>",[257,266,274,282,290,298,306,313],{"id":258,"title_md5":259,"publish_date":260,"author_md5":261,"is_original":23,"collection":262,"summary_md5":263,"cover_url":264,"cover_url_1_1":265},203,"2c408d886a6f7344e88f7b1de0d5fd86","2024-08-06","bc27fa490c4d0d525bac812fc0793534","#AI 3D Generator","85c753f4be6c3938c4a4916d348142ab","article_res/cover/68382532dcde38141c8b44efe285413d.jpeg","article_res/cover/4028f0d69e23efa6431ffcfeea9e65a3.jpeg",{"id":267,"title_md5":268,"publish_date":269,"author_md5":261,"is_original":23,"collection":270,"summary_md5":271,"cover_url":272,"cover_url_1_1":273},52,"d0d26bb4d047a4b9404df5e5ce05e7c8","2025-02-01","#CUA #AI Agent","4963841a32d40a40e795354a44fcf8dc","article_res/cover/4cdd14a75ebd0f2a2a0e5ac6255b1245.jpeg","article_res/cover/5e1606775fe81141ef0642d98338ea31.jpeg",{"id":275,"title_md5":276,"publish_date":277,"author_md5":261,"is_original":23,"collection":278,"summary_md5":279,"cover_url":280,"cover_url_1_1":281},364,"a4a767142560868dc550c4145304e6b9","2023-12-21","#AI Video Generator #Google","c1e5514264408f82b1c335e48c0a40e3","article_res/cover/b79fb183e41ce8c75e8a684504af6ceb.jpeg","article_res/cover/7ba72efab23570cfe245da5e1536631e.jpeg",{"id":283,"title_md5":284,"publish_date":285,"author_md5":261,"is_original":23,"collection":286,"summary_md5":287,"cover_url":288,"cover_url_1_1":289},307,"1ca00695e330439c25eb19f7afac5026","2024-03-28","#LLM","a3b47e83e5d6f8bcceeaaf4d6a3a070e","article_res/cover/27c22d877aa4d827c74730a40932ef64.jpeg","article_res/cover/b925b57062ba315da3067ee08426cf12.jpeg",{"id":291,"title_md5":292,"publish_date":293,"author_md5":261,"is_original":4,"collection":294,"summary_md5":295,"cover_url":296,"cover_url_1_1":297},415,"2bba3baeedc50d897719456e5fe09b57","2023-09-06","#Meta #AI Code Generator","04196f015b8ed5413f2d3530cb358eac","article_res/cover/39946e0375149e1b7bc4a43042b1ec80.jpeg","article_res/cover/317a03186003d1b852b7b10c04b8c214.jpeg",{"id":299,"title_md5":300,"publish_date":301,"author_md5":261,"is_original":23,"collection":302,"summary_md5":303,"cover_url":304,"cover_url_1_1":305},420,"37ffa1cfdcda14d3e2da3b5dcdb79075","2023-08-13","#AI Code Generator","b05b4e06ffbe5a05f2dc4727c0f760b7","article_res/cover/8c73b2544d48b212f06f0c40b0718db5.jpeg","article_res/cover/2c471b9e0a78072bbcd4cb89bad48a3f.jpeg",{"id":307,"title_md5":308,"publish_date":309,"author_md5":261,"is_original":23,"collection":262,"summary_md5":310,"cover_url":311,"cover_url_1_1":312},172,"3daaea69691e09c5f8af7df545501d11","2024-09-20","c6eed6b9a6da6ba50ed82f65ac6455da","article_res/cover/bdebefa89e9ad647cade3cefe41d9e43.jpeg","article_res/cover/0525bd5f2d8b4587bbc98fe43e783af3.jpeg",{"id":314,"title_md5":315,"publish_date":316,"author_md5":261,"is_original":23,"collection":317,"summary_md5":318,"cover_url":319,"cover_url_1_1":320},135,"09dcbb28b3509bdf4bb0b4d1b2ddc824","2024-11-06","#Philosophy #Psychology","be4b41e96921b085fd942cc3d8869d06","article_res/cover/89c64742d51a90a947f4329bb25d9ea6.jpeg","article_res/cover/7c0d4dae638efc6c73a6ae53a8ed1c58.jpeg",{"related":322,"small":361},[323,331,338,346,353],{"id":324,"publish_date":325,"is_original":4,"collection":5,"cover_url":326,"cover_url_1_1":327,"title":328,"summary":329,"author":330},569,"2022-04-17","article_res/cover/b03629a77c01b8dc880bc860c977a3f0.jpeg","article_res/cover/c08a01bf1570dd525fa626a805201521.jpeg","Summary of \"Prosperity and Decline\": The Evolution of American Business Forms in the Early 20th Century","In the past, human beings were the first productive force; in the future, systems must become the first productive force. - Frederick Taylor, \"The Principles of Scientific Management\"","Book Summary",{"id":332,"publish_date":333,"is_original":23,"collection":262,"cover_url":334,"cover_url_1_1":335,"title":336,"summary":337,"author":28},291,"2024-04-27","article_res/cover/da746312f3858a01f966e1c17ad1f9a3.jpeg","article_res/cover/53d099e12e83ee7f5c777e5320cbd173.jpeg","AI Generate 3D - AIUNI and Polycam trials","We believe that 3D capture is for everyone, so we made it easy. - Polycam",{"id":339,"publish_date":340,"is_original":4,"collection":5,"cover_url":341,"cover_url_1_1":342,"title":343,"summary":344,"author":345},610,"2022-03-07","article_res/cover/e0d64dda34b8acc71583ce0ffdfcb430.jpeg","article_res/cover/5e763f310a559b989f929e0cee5d4b37.jpeg","[Excerpt of Opinions] Tim Ferriss Show #542: Chris Dixon and Naval Ravikant (3/4)","The wonder of Web3, How to pick the right Hill to Climb","Tim Ferriss Excerpts",{"id":347,"publish_date":348,"is_original":4,"collection":5,"cover_url":349,"cover_url_1_1":350,"title":351,"summary":352,"author":53},360,"2023-12-26","article_res/cover/2a0a20d5a76c6b4451e8854f6ba6bd09.jpeg","article_res/cover/a2ccc5a767c55bff6c4dd1dfdf83f2c1.jpeg","Key Trends in \"CRYPTO THESES 2024\"","If last year’s report was “It’s So Over,” this year’s report is “We’re So Back.”",{"id":354,"publish_date":355,"is_original":23,"collection":356,"cover_url":357,"cover_url_1_1":358,"title":359,"summary":360,"author":28},237,"2024-06-23","#AI Grant","article_res/cover/b1b8319c2d88f77217bf268133c0b58d.jpeg","article_res/cover/4c0f58fe75adf80ec93a69406fb5b234.jpeg","AI Grant Project List - Batch 1","Check out the thread to learn more about the companies",[362,368,374],{"title":10,"list":363},[364,365,366,367],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":369},[370,371,372,373],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},{"title":242,"list":375},[],[8,9,10],[8,12,13,14,9,10,15,16,17,18],["Reactive",245],1754646419803]