[{"data":1,"prerenderedAt":380},["ShallowReactive",2],{"$fgukOamtKU1RtUiMFsqdObttmqPPQz0uc7bl_gj_LyX0":3,"$f1SS9xTsTDQKtWRuMmj0c06s6LA9NkU63tWs_XYaiQyY":245,"article-149":379},{"code":4,"msg":5,"data":6},0,"",{"category":7,"tag":11,"popular":19,"latest":86,"banner":126,"list":151,"cache":244},[8,9,10],"Agent","OpenAI","LLM",[8,12,13,14,9,10,15,16,17,18],"Google","Nvidia","Claude","DeepSeek","OCR","Chat","Generator",[20,29,37,45,54,62,70,79],{"id":21,"publish_date":22,"is_original":23,"collection":5,"cover_url":24,"cover_url_1_1":25,"title":26,"summary":27,"author":28},411,"2023-09-10",1,"article_res/cover/451ef50c225a8dc61c4336506794d13b.jpeg","article_res/cover/3ba9dc7a72f87d40b20fc2d225289ee3.jpeg","Idealism","Reality is created by the mind, we can change our reality by changing our mind. - Plato","Renee's Entrepreneurial Journey",{"id":30,"publish_date":31,"is_original":23,"collection":32,"cover_url":33,"cover_url_1_1":34,"title":35,"summary":36,"author":28},108,"2024-12-07","#LLM #AGI #AI Agent","article_res/cover/0039044422e4ec9f61c18e8ee1693bb0.jpeg","article_res/cover/4220971b108a91d21407d87bb02fbaa6.jpeg","Freysa.ai: The World's First Adversarial AI Agent Game","说服 Freysa 把钱包里的钱都拿出来",{"id":38,"publish_date":39,"is_original":23,"collection":40,"cover_url":41,"cover_url_1_1":42,"title":43,"summary":44,"author":28},12,"2025-03-09","#Oxford #Reasoning #LLM #Tool Use","article_res/cover/d448e9b3617a0b5302e1bd10c438bca9.jpeg","article_res/cover/864a468f9cc4c9317efadb3811909888.jpeg","Agentic Reasoning Framework - Significantly enhance the reasoning ability of LLMs through the integration of external tools using agents","Agentic Reasoning: Reasoning LLMs with Tools for Deep Research",{"id":46,"publish_date":47,"is_original":4,"collection":48,"cover_url":49,"cover_url_1_1":50,"title":51,"summary":52,"author":53},480,"2023-04-14","#Stable Diffusion","article_res/cover/0bdbe7cb1de4a78e54536e5d9afa7ec9.jpeg","article_res/cover/b3d6ffec0608dcfaf18c5a69906d1490.jpeg","【AIGC Learning】Generate Prompts Using Word Graphs - Stable Diffusion Web UI Series 13","AI will become a powerful tool in education, transforming the way we learn and deliver instruction.  \n- Reid Hoffman","--",{"id":55,"publish_date":56,"is_original":4,"collection":57,"cover_url":58,"cover_url_1_1":59,"title":60,"summary":61,"author":28},413,"2023-09-08","#Neuroscience","article_res/cover/74f8302d78a23d9430f22171eae136b6.jpeg","article_res/cover/87ca08af81bb304746be5261160964c0.jpeg","Can machines be conscious?","Do we have an ethical obligation to not turn off conscious machines? Would turning them off be murder? No. I don't lose any sleep over unplugging a conscious machine.\n- Jeff Hawkins, \"A Thousand Brains\"",{"id":63,"publish_date":64,"is_original":23,"collection":65,"cover_url":66,"cover_url_1_1":67,"title":68,"summary":69,"author":28},178,"2024-09-09","#Entrepreneurship","article_res/cover/a7224f025b55d1820408085faef63079.jpeg","article_res/cover/11a9995b096cbf64465ef01b8673b154.jpeg","37signals company","This damn sense of relaxation",{"id":71,"publish_date":72,"is_original":4,"collection":73,"cover_url":74,"cover_url_1_1":75,"title":76,"summary":77,"author":78},460,"2023-05-12","#Google","article_res/cover/b970687b12faa52da976f91248c2aa7b.jpeg","article_res/cover/d1e71b52cfd2c63bc6e71f3e85ff135c.jpeg","Learn what BRC-20 and Ordinals are using Google Bard","Ordinals - a new protocol that allows users to store arbitrary data on the Bitcoin blockchain","Google Bard mainly writes",{"id":80,"publish_date":81,"is_original":23,"collection":5,"cover_url":82,"cover_url_1_1":83,"title":84,"summary":85,"author":28},309,"2024-03-26","article_res/cover/9877f95894ee88532d0e6012c23a2df3.jpeg","article_res/cover/20092164ddc109ce6ae56b1984246751.jpeg","Learning the Cancun Upgrade with lepton and perplexity","Building a quick conversation-based search demo with Lepton AI.",[87,95,103,111,119],{"id":88,"publish_date":89,"is_original":23,"collection":90,"cover_url":91,"cover_url_1_1":92,"title":93,"summary":94,"author":28},627,"2025-03-20","#AI Avatar #AI Video Generation","article_res/cover/d95481358f73924989f8c4ee9c75d1c8.jpeg","article_res/cover/b74bc0fab01f8b6a6aa87696c0c3ed8b.jpeg","DisPose: Generating Animated Videos by Driving Video with Reference Images","DisPose is a controllable human image animation method that enhances video generation.",{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},626,"2025-03-21","#Deep Dive into LLMs #LLM #RL #Andrej Karpathy #AlphaGo","article_res/cover/446553a5c8f8f2f07d97b20eaee84e56.jpeg","article_res/cover/e6c2823409c9b34624064b9acbaca6f1.jpeg","AlphaGo and the Power of Reinforcement Learning - Andrej Karpathy's Deep Dive on LLMs (Part 9)","Simply learning from humans will never surpass human capabilities.",{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},625,"2025-03-22","#Deep Dive into LLMs #LLM #RL #RLHF #Andrej Karpathy","article_res/cover/8da81d38b1e5cf558a164710fd8a5389.jpeg","article_res/cover/96f028d76c362a99a0dd56389e8f7a9b.jpeg","Reinforcement Learning from Human Feedback (RLHF) - Andrej Karpathy's Deep Dive on LLMs (Part 10)","Fine-Tuning Language Models from Human Preferences",{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},624,"2025-03-23","#Deep Dive into LLMs #LLM #Andrej Karpathy #AI Agent #MMM","article_res/cover/a5e7c3d48bb09109684d6513287c661d.jpeg","article_res/cover/d3f22b7c0ab8d82fd2da457a299e0773.jpeg","The Future of Large Language Models - Andrej Karpathy's In-Depth Explanation of LLM (Part 11)","preview of things to come",{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},623,"#Google #Voe #AI Video Generation","article_res/cover/c44062fea0f336c2b96b3928292392c2.jpeg","article_res/cover/a041041c69092ad3db191c5bf3ff981b.jpeg","Trial of Google's video generation model VOE2","Our state-of-the-art video generation model",[127,135,143],{"id":128,"publish_date":129,"is_original":23,"collection":130,"cover_url":131,"cover_url_1_1":132,"title":133,"summary":134,"author":28},300,"2024-04-16","#AI in Science #AGI","article_res/cover/6bf01e793e0f33e848572412eebdf9b0.jpeg","article_res/cover/91a5ee21dafecb914fabeb9430d46ec1.jpeg","Would Einstein lose his job - AI and Quantum Computing: A Glimpse into the Near Future","So Einstein's job is still safe.",{"id":136,"publish_date":137,"is_original":23,"collection":138,"cover_url":139,"cover_url_1_1":140,"title":141,"summary":142,"author":28},101,"2024-12-14","#Nvidia #AI 3D Generator","article_res/cover/693e07c85980c5c0c8fde3f037733f23.jpeg","article_res/cover/9ea8edff2d5d303ff3fffff3f6f9c3d9.jpeg","NVIDIA's open-source 3D project LLaMA-Mesh","LLaMA-Mesh: Unifying 3D Mesh Generation with Language Models",{"id":144,"publish_date":145,"is_original":23,"collection":146,"cover_url":147,"cover_url_1_1":148,"title":149,"summary":150,"author":28},131,"2024-11-10","#OpenAI","article_res/cover/87f8ed353ce39f31960e7cdfaf075a35.jpeg","article_res/cover/f597a63935f5cd32e484b4aadd6019e8.jpeg","ChatGPT has launched the Search function","Get fast, timely answers with links to relevant web sources.",{"big":152,"small":214},[153,181],{"title":154,"list":155},"AGENT",[156,157,165,173],{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":158,"publish_date":159,"is_original":23,"collection":160,"cover_url":161,"cover_url_1_1":162,"title":163,"summary":164,"author":28},622,"2025-03-24","#OWL #AI Agent #MAS #MCP #CUA","article_res/cover/cb50ca7f2bf4d1ed50202d7406e1c19a.jpeg","article_res/cover/4aa7aa3badfacf3cc84121334f1050dd.jpeg","OWL: Multi-agent collaboration","OWL: Optimized Workforce Learning for General Multi-Agent Assistance in Real-World Task Automation",{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},620,"2025-03-26","#LLM #Google #Gemini #AI Agent","article_res/cover/53751a6dbbe990b1eb0b63f3b062aed4.jpeg","article_res/cover/031344981f0a212ff82d1f3a64aa5756.jpeg","Gemini 2.5 Pro, claimed to be far ahead of the competition, has been released with great fanfare: comprehensively surpassing other LLMs and topping the global rankings","Gemini 2.5: Our most intelligent AI model",{"id":174,"publish_date":175,"is_original":23,"collection":176,"cover_url":177,"cover_url_1_1":178,"title":179,"summary":180,"author":28},616,"2025-03-29","#MAS #AI Agent #AI Coder #MetaGPT #MGX","article_res/cover/9dcd702ad2035902e5e77967c34a1f1e.jpeg","article_res/cover/0a97fc4a922753c8f46ff38792020df8.jpeg","MGX - An automated website-building platform composed of multiple AI Agents","Your 24/7 AI Team | Dream, Chat, Create.",{"title":182,"list":183},"OPENAI",[184,191,199,206],{"id":185,"publish_date":167,"is_original":23,"collection":186,"cover_url":187,"cover_url_1_1":188,"title":189,"summary":190,"author":28},619,"#OpenAI #AI Image Generator #4o #MMM #AR Transformer","article_res/cover/2faffc97fcecf3151552cb0fd3206d89.jpeg","article_res/cover/1133cb4948af44cee2e7fbe79efb69e5.jpeg","The native image function of GPT-4o is officially launched","Introducing 4o Image Generation",{"id":192,"publish_date":193,"is_original":4,"collection":194,"cover_url":195,"cover_url_1_1":196,"title":197,"summary":198,"author":28},434,"2023-07-15","#Anthropic #OpenAI #Google #AI Code Generator #Claude","article_res/cover/e1b6f600a2b9f262a4392684e5f2ce25.jpeg","article_res/cover/6e1772e83f78f9a351ab23d3e414adee.jpeg","Latest Updates on Google Bard /Anthropic Claude2 / ChatGPT Code Interpreter","We want our models to use their programming skills to provide more natural interfaces to the basic functions of our computers.  \n - OpenAI",{"id":200,"publish_date":201,"is_original":4,"collection":146,"cover_url":202,"cover_url_1_1":203,"title":204,"summary":205,"author":28},417,"2023-08-24","article_res/cover/bccf897d50a88b18364e35f7466387e0.jpeg","article_res/cover/2f871085c1073717c1703ae86e18056f.jpeg","The GPT-3.5 Turbo fine-tuning (fine-tuning function) has been released～","Developers can now bring their own data to customize GPT-3.5 Turbo for their use cases.",{"id":207,"publish_date":208,"is_original":4,"collection":209,"cover_url":210,"cover_url_1_1":211,"title":212,"summary":213,"author":28},407,"2023-09-22","#OpenAI #AI Image Generator","article_res/cover/c59005e903d35cfc32346e2756e2728a.jpeg","article_res/cover/ba011d265e6d84b5c8cb6fd6b757b6cc.jpeg","Dall-E 3","DALL·E 3 understands significantly more nuance and detail, allowing you to easily translate your ideas into images.",[215,221,241],{"title":10,"list":216},[217,218,219,220],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":223},"GOOGLE",[224,225,226,234],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},615,"2025-03-30","#AI Researcher #AI Science #HKU #Google #AI Agent","article_res/cover/21fadf906067714bb0db31ae13a77c15.jpeg","article_res/cover/2697999a72bd26b22e85f0e92936d3ed.jpeg","AI-Researcher: LLM-driven全自动 scientific research assistant","AI-Researcher: Fully-Automated Scientific Discovery with LLM Agents  \nOpen-Sourced Alternative to Google AI Co-Scientist",{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},463,"2023-05-09","article_res/cover/89800f207723acdb55fc53bf999ebdc9.jpeg","article_res/cover/5764f369b4accd8f83e94aa4c077a175.jpeg","The Smallville sandbox world - A town with 25 virtual residents","Believable proxies of human behavior can empower interactive apps: Immersive environment, Rehearsal space, Prototyping tool",{"title":242,"list":243},"NVIDIA",[],true,{"code":4,"msg":5,"data":246},{"id":247,"publish_date":248,"is_original":23,"collection":249,"articles_id":250,"cover_url":251,"cover_url_1_1":252,"title":253,"summary":254,"author":28,"content":255,"popular":256,"list":321,"category":377,"tag":378},149,"2024-10-17","#State of AI Report 2024","gK40T-rIqsINTqVuS25cdw","article_res/cover/3b8743ca9c648e09b59e160b16c4b196.jpeg","article_res/cover/4773993643e3fc5a8b782c4d0761636d.jpeg","\"State of AI Report 2024\" (1) - AlphaGeometry, Synthetic Data, RAG","Foundation models demonstrate their ability to break out of language as multimodal research.","\u003Cdiv class=\"rich_media_content js_underline_content\n                       autoTypeSetting24psection\n            \" id=\"js_content\">\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>The report contains a lot of details, and I mainly focus on topics that I haven't heard before or have less understanding of. Today, I will translate three sections first:\u003C/p>\u003Ch2 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);cursor: pointer;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;text-wrap: wrap;background-color: rgb(255, 255, 255);'>\u003Cspan style=\"cursor: pointer;font-size: 22px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">AlphaGeometry\u003C/span>\u003C/h2>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>due to insufficient reasoning capabilities and lack of training data. However, AlphaGeometry introduces a symbolic derivation engine that effectively addresses this shortfall.\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>The team from Google DeepMind and New York University (NYU) used a symbolic engine to generate millions of synthetic theorems and proofs, and trained a language model from scratch using this data.\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>, a performance close to that of gold medalists at the International Mathematical Olympiad. The best performance by other AIs was solving only 10 problems.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100007054\" data-ratio=\"0.31655844155844154\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"616\" style=\"\" src=\"./assets/17423781590640.010980189588086109.jpeg\">\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>In addition, AlphaGeometry demonstrated good generalization ability. For example, it discovered that a detail in a 2004 International Mathematical Olympiad question was not necessary for the proof.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100007053\" data-ratio=\"0.6590909090909091\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"616\" style=\"\" src=\"./assets/17423781590590.06128541004457366.jpeg\">\u003C/p>\u003Ch2 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);cursor: pointer;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;text-wrap: wrap;background-color: rgb(255, 255, 255);'>\u003Cspan style=\"cursor: pointer;font-size: 22px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">Synthetic Data\u003C/span>\u003C/h2>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>When I previously shared the HAI report, I mentioned the discussion on synthetic data:\u003C/p>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);cursor: pointer;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;text-wrap: wrap;background-color: rgb(255, 255, 255);'>\u003Cspan style=\"cursor: pointer;font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">Supporters\u003C/span>\u003C/h3>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>Nowadays, synthetic data is being increasingly adopted.\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>In last year's report, there were differing views on synthetic data: some found it very useful, while others worried that it could lead to model failure due to the accumulation of errors. Now, it seems that perspectives are gradually shifting:\u003C/p>\u003Cul style='margin-top: 8px;margin-bottom: 8px;cursor: pointer;padding-left: 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;background-color: rgb(255, 255, 255);' class=\"list-paddingleft-1\">\u003Cli style=\"cursor: pointer;\">\u003Csection style=\"cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">Synthetic data is not only the main source of training data for the Phi series models, but Anthropic also used synthetic data when training Claude 3, helping to make up for possible missing scenarios in the training data.\u003C/section>\u003C/li>\u003Cli style=\"cursor: pointer;\">\u003Csection style=\"cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">Hugging Face generated over 30 million files and 25 billion synthetic texts (including textbooks, blog posts, and stories) using Mixtral-8x7B Instruct to recreate the training dataset for Phi-1.5, which they call\u003C/section>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100007057\" data-ratio=\"0.42962962962962964\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423781590580.7959688977551513.png\">\u003C/p>\u003C/li>\u003Cli style=\"cursor: pointer;\">\u003Csection style=\"cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">a series of models specifically designed for generating synthetic data, with permissive usage licenses. Meta's Llama can also be used to generate synthetic data.\u003C/section>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100007056\" data-ratio=\"0.562037037037037\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"\" src=\"./assets/17423781590640.5504166225955487.png\">\u003C/p>\u003C/li>\u003Cli style=\"cursor: pointer;\">\u003Csection style=\"cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">Furthermore, high-quality instruction data can be directly extracted from aligned LLMs to generate synthetic data, optimized using technologies like Magpie. Models fine-tuned in this way sometimes rival the performance of Llama-3-8B-Instruct.\u003C/section>\u003C/li>\u003C/ul>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);cursor: pointer;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;text-wrap: wrap;background-color: rgb(255, 255, 255);'>\u003Cspan style=\"cursor: pointer;font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">Opponents\u003C/span>\u003C/h3>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>Although model developers are moving quickly, researchers are evaluating whether there is a tipping point where synthetic data causes model collapse and whether any mitigation measures are effective.\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>that model collapse occurs across a variety of AI architectures, including fine-tuned language models. This challenges the view that pre-training or periodic exposure to small amounts of real data can prevent model degradation (measured by perplexity scores).\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100007058\" data-ratio=\"1.0777777777777777\" data-s=\"300,640\" data-type=\"webp\" data-w=\"1080\" style=\"\" src=\"./assets/17423781590780.8056893275035202.jpeg\">\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>\", because continuous access to diverse, human-generated data will become increasingly critical to maintaining model quality. However, these results primarily focus on a scenario where real data is gradually replaced by synthetic data across generations. In practice, real and synthetic data are typically accumulated simultaneously.\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>Other studies indicate that as long as the proportion of synthetic data is not too high, model collapse can generally be avoided.\u003C/p>\u003Ch2 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);cursor: pointer;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;text-wrap: wrap;background-color: rgb(255, 255, 255);'>\u003Cspan style=\"cursor: pointer;font-size: 22px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">RAG\u003C/span>\u003C/h2>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>While retrieval and embeddings are not new concepts, interest in them has grown significantly with the rise of Retrieval-Augmented Generation (RAG), driving improvements in the quality of embedding models.\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>Following the conventional path of success for large language models (LLMs), scale brings significant performance gains (for example, GritLM has approximately 47 billion parameters, compared to previous embedding models which typically only had 110 million parameters).\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>is a visual-language embedding model that improves retrieval not only by using text embeddings but also by leveraging the visual structure of documents.\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>is at the top.\u003C/p>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);cursor: pointer;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;text-wrap: wrap;background-color: rgb(255, 255, 255);'>\u003Cspan style=\"cursor: pointer;font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">Context is a key driver of performance.\u003C/span>\u003C/h3>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>Traditional RAG solutions often generate text fragments every 256 tokens using a sliding window (with 128 tokens overlapping the previous section). While this method improves retrieval efficiency, it significantly reduces accuracy.\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>(from 5.7% to 3.7%), and can be scaled via Anthropic's prompt caching technology.\u003C/p>\u003Cp style=\"text-align: center;\">\u003Cimg class=\"rich_pages wxw-img js_insertlocalimg\" data-croporisrc=\"https://mmbiz.qpic.cn/sz_mmbiz_jpg/YdtkbCEBMDGY2UlEtyJeS7eC2Nt7jvhfpicKzp77ZNCw47DOGW2KD6ib8opqS4FSmq4VQ3h5iaKFgPKxlXwIhcV5g/0?wx_fmt=jpeg&amp;from=appmsg\" data-cropx1=\"0\" data-cropx2=\"1080\" data-cropy1=\"478.3391003460207\" data-cropy2=\"1108.0276816608996\" data-imgfileid=\"100007055\" data-ratio=\"0.5824074074074074\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"width: 578px;height: 337px;\" src=\"./assets/17423781600640.339035103445511.jpeg\">\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>'s impact (measured in recall).\u003C/p>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);cursor: pointer;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;text-wrap: wrap;background-color: rgb(255, 255, 255);'>\u003Cspan style=\"cursor: pointer;font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">The evaluation issue of RAG remains unresolved.\u003C/span>\u003C/h3>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>Many commonly used RAG benchmarks are actually repurposed retrieval or question-answering datasets, which cannot effectively evaluate the accuracy of citations, the importance of each piece of text to the overall answer, or the impact of handling conflicting information.\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>It provides a large-scale set of complex multidimensional problems, which come from real user queries and require in-depth research and analysis to answer.\u003C/p>\u003Cp style=\"display: none;\">\u003Cmp-style-type data-value=\"3\">\u003C/mp-style-type>\u003C/p>\u003C/div>",[257,266,274,282,290,297,305,313],{"id":258,"title_md5":259,"publish_date":260,"author_md5":261,"is_original":23,"collection":262,"summary_md5":263,"cover_url":264,"cover_url_1_1":265},63,"af3f1abfac3654a7cbd3ddfc4d0c8793","2025-01-22","bc27fa490c4d0d525bac812fc0793534","#Microsoft #Research #Nature #AI Science #chemistry","5a5f01205ff3aaeadd9b697718d055c6","article_res/cover/d2f6a78ac00110002e05dbd868952d51.jpeg","article_res/cover/bc00a61c35d090b9734d647c8c1830ba.jpeg",{"id":267,"title_md5":268,"publish_date":269,"author_md5":270,"is_original":4,"collection":5,"summary_md5":271,"cover_url":272,"cover_url_1_1":273},583,"bd44bfb6466c551164351c3e2bc01c90","2022-04-03","8b3607d0f4181a3cb6ffdccf7185f09b","ea2cc2f51cc0f3baab993da71452324a","article_res/cover/979bfe15ac7e07eaa4832113dc3c26ca.jpeg","article_res/cover/447509ac7057109ac36e7bd47fad42c9.jpeg",{"id":275,"title_md5":276,"publish_date":277,"author_md5":261,"is_original":23,"collection":278,"summary_md5":279,"cover_url":280,"cover_url_1_1":281},341,"b55735ef963870d9a24257601ab5d7be","2024-01-20","#AI Avatar #Tencent","1ce1dc3993f5e729e0cc63d4bbbbe60b","article_res/cover/a3050658715f87fbc909668bbb82cd85.jpeg","article_res/cover/f2b3ca4f1b3da75fc3e72e589183f822.jpeg",{"id":283,"title_md5":284,"publish_date":285,"author_md5":261,"is_original":4,"collection":286,"summary_md5":287,"cover_url":288,"cover_url_1_1":289},275,"15751a77f7da4bcba33eff3ff5a0811a","2024-05-14","#LLM","cbfe16f9ef04dc79ba98a5cb2c23959b","article_res/cover/3ebae852d0132307cb13b9b68ead6b83.jpeg","article_res/cover/1675fc145cc31c6bdb0f404a3e7ae098.jpeg",{"id":291,"title_md5":292,"publish_date":293,"author_md5":261,"is_original":23,"collection":73,"summary_md5":294,"cover_url":295,"cover_url_1_1":296},323,"fa86b0654bd2eb53bab871a53281cd5c","2024-02-24","1169500f98fe518591fdcfcc92c2c85c","article_res/cover/f0f7620a34dd1d0a016331344d986b68.jpeg","article_res/cover/7184f2f31d6d75135858671cf34f41f5.jpeg",{"id":298,"title_md5":299,"publish_date":300,"author_md5":261,"is_original":23,"collection":301,"summary_md5":302,"cover_url":303,"cover_url_1_1":304},242,"b32ac197ef4528cfc9cc7703113bcff1","2024-06-18","#AI Avatar #Lip Sync","960f63511c33c5ef68155ad69afdfa03","article_res/cover/962521d82eac2b5cf90b12ec083495d6.jpeg","article_res/cover/55537cb0bfe54667c4d8ccffaa064c1c.jpeg",{"id":306,"title_md5":307,"publish_date":308,"author_md5":261,"is_original":23,"collection":309,"summary_md5":310,"cover_url":311,"cover_url_1_1":312},399,"ea200fc67f4264d2920caf55136495c6","2023-10-08","#AI Image Generator #Tencent","e66cb4e964b30d415141b35935cf81e7","article_res/cover/d8df91af5daff4596d41f539900b2f11.jpeg","article_res/cover/4e52581af1766f866fc106997dadc216.jpeg",{"id":314,"title_md5":315,"publish_date":316,"author_md5":261,"is_original":23,"collection":317,"summary_md5":318,"cover_url":319,"cover_url_1_1":320},298,"d9f63db3e5e9cc3ac7430b67efdb226b","2024-04-20","#AI Index Report 2024","f91a4598abb2715106e15031de67547f","article_res/cover/728127e96e441e9be92dd905db06e866.jpeg","article_res/cover/e8a1f6d0a6cefc0d9c8c962a0e94fd3b.jpeg",{"related":322,"small":362},[323,330,338,346,354],{"id":324,"publish_date":325,"is_original":4,"collection":5,"cover_url":326,"cover_url_1_1":327,"title":328,"summary":329,"author":28},443,"2023-06-21","article_res/cover/804cde3cc06176bd7d4da0ec0eb8d10f.jpeg","article_res/cover/05f9b4c8b4236b3195a27e386be86f21.jpeg","DALL·E: outpainting experience trial","Extend creativity and tell a bigger story with DALL·E images of any size",{"id":331,"publish_date":332,"is_original":4,"collection":333,"cover_url":334,"cover_url_1_1":335,"title":336,"summary":337,"author":53},497,"2023-02-03","#Entrepreneurship #Psychology","article_res/cover/fc13cb4a40d5006b6ab96baf922a8cf0.jpeg","article_res/cover/d8d80c64874011e73ef9502a3f9f99f6.jpeg","Will tomorrow be better?","As Dao improves a foot, magic improves a chi (higher).",{"id":339,"publish_date":340,"is_original":4,"collection":341,"cover_url":342,"cover_url_1_1":343,"title":344,"summary":345,"author":28},129,"2024-11-12","#OpenAI #AI Code Generator #LLM #o1","article_res/cover/6654db2a98aadeea109fc3364da389c7.jpeg","article_res/cover/b1e5e6d6a62fb9b205ac09b16c6a25da.jpeg","OnBoard! Podcast Notes - EP 62: OpenAI o1 and the New Paradigm of LLM + Reinforcement Learning (1)","EP 62. A deep interpretation of Google Deepmind and LLM researchers on OpenAI o1 and the new paradigm of LLM + reinforcement learning",{"id":347,"publish_date":348,"is_original":4,"collection":5,"cover_url":349,"cover_url_1_1":350,"title":351,"summary":352,"author":353},605,"2022-03-12","article_res/cover/b8b7c3b1377d8123bafbb8fd171f029c.jpeg","article_res/cover/2994594e2c4eef4b4732bfe9d526ffe0.jpeg","How to read financial statements - P&L Income Statement","For business owners, there are many important documents to learn to read. One of the most important is P&L.","Course notes",{"id":355,"publish_date":356,"is_original":23,"collection":357,"cover_url":358,"cover_url_1_1":359,"title":360,"summary":361,"author":28},139,"2024-10-30","#State of AI Report 2024 #AI Audio Generator","article_res/cover/63c1c7274cbf8d6b0036af2bb36d4b2a.jpeg","article_res/cover/83f6acdc1f075ef5583203978dd72066.jpeg","\"State of AI Report 2024\" (6) - Voice-related","The next (uncanny) frontier: speech-to-speech?",[363,369,375],{"title":10,"list":364},[365,366,367,368],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":370},[371,372,373,374],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},{"title":242,"list":376},[],[8,9,10],[8,12,13,14,9,10,15,16,17,18],["Reactive",245],1754646413991]