[{"data":1,"prerenderedAt":374},["ShallowReactive",2],{"$fgukOamtKU1RtUiMFsqdObttmqPPQz0uc7bl_gj_LyX0":3,"$fugt22g8fjTn6r7ivAq56k8cwElIwVm2O-VcpyyK5jgs":245,"article-218":373},{"code":4,"msg":5,"data":6},0,"",{"category":7,"tag":11,"popular":19,"latest":86,"banner":126,"list":151,"cache":244},[8,9,10],"Agent","OpenAI","LLM",[8,12,13,14,9,10,15,16,17,18],"Google","Nvidia","Claude","DeepSeek","OCR","Chat","Generator",[20,29,37,45,54,62,70,79],{"id":21,"publish_date":22,"is_original":23,"collection":5,"cover_url":24,"cover_url_1_1":25,"title":26,"summary":27,"author":28},411,"2023-09-10",1,"article_res/cover/451ef50c225a8dc61c4336506794d13b.jpeg","article_res/cover/3ba9dc7a72f87d40b20fc2d225289ee3.jpeg","Idealism","Reality is created by the mind, we can change our reality by changing our mind. - Plato","Renee's Entrepreneurial Journey",{"id":30,"publish_date":31,"is_original":23,"collection":32,"cover_url":33,"cover_url_1_1":34,"title":35,"summary":36,"author":28},108,"2024-12-07","#LLM #AGI #AI Agent","article_res/cover/0039044422e4ec9f61c18e8ee1693bb0.jpeg","article_res/cover/4220971b108a91d21407d87bb02fbaa6.jpeg","Freysa.ai: The World's First Adversarial AI Agent Game","说服 Freysa 把钱包里的钱都拿出来",{"id":38,"publish_date":39,"is_original":23,"collection":40,"cover_url":41,"cover_url_1_1":42,"title":43,"summary":44,"author":28},12,"2025-03-09","#Oxford #Reasoning #LLM #Tool Use","article_res/cover/d448e9b3617a0b5302e1bd10c438bca9.jpeg","article_res/cover/864a468f9cc4c9317efadb3811909888.jpeg","Agentic Reasoning Framework - Significantly enhance the reasoning ability of LLMs through the integration of external tools using agents","Agentic Reasoning: Reasoning LLMs with Tools for Deep Research",{"id":46,"publish_date":47,"is_original":4,"collection":48,"cover_url":49,"cover_url_1_1":50,"title":51,"summary":52,"author":53},480,"2023-04-14","#Stable Diffusion","article_res/cover/0bdbe7cb1de4a78e54536e5d9afa7ec9.jpeg","article_res/cover/b3d6ffec0608dcfaf18c5a69906d1490.jpeg","【AIGC Learning】Generate Prompts Using Word Graphs - Stable Diffusion Web UI Series 13","AI will become a powerful tool in education, transforming the way we learn and deliver instruction.  \n- Reid Hoffman","--",{"id":55,"publish_date":56,"is_original":4,"collection":57,"cover_url":58,"cover_url_1_1":59,"title":60,"summary":61,"author":28},413,"2023-09-08","#Neuroscience","article_res/cover/74f8302d78a23d9430f22171eae136b6.jpeg","article_res/cover/87ca08af81bb304746be5261160964c0.jpeg","Can machines be conscious?","Do we have an ethical obligation to not turn off conscious machines? Would turning them off be murder? No. I don't lose any sleep over unplugging a conscious machine.\n- Jeff Hawkins, \"A Thousand Brains\"",{"id":63,"publish_date":64,"is_original":23,"collection":65,"cover_url":66,"cover_url_1_1":67,"title":68,"summary":69,"author":28},178,"2024-09-09","#Entrepreneurship","article_res/cover/a7224f025b55d1820408085faef63079.jpeg","article_res/cover/11a9995b096cbf64465ef01b8673b154.jpeg","37signals company","This damn sense of relaxation",{"id":71,"publish_date":72,"is_original":4,"collection":73,"cover_url":74,"cover_url_1_1":75,"title":76,"summary":77,"author":78},460,"2023-05-12","#Google","article_res/cover/b970687b12faa52da976f91248c2aa7b.jpeg","article_res/cover/d1e71b52cfd2c63bc6e71f3e85ff135c.jpeg","Learn what BRC-20 and Ordinals are using Google Bard","Ordinals - a new protocol that allows users to store arbitrary data on the Bitcoin blockchain","Google Bard mainly writes",{"id":80,"publish_date":81,"is_original":23,"collection":5,"cover_url":82,"cover_url_1_1":83,"title":84,"summary":85,"author":28},309,"2024-03-26","article_res/cover/9877f95894ee88532d0e6012c23a2df3.jpeg","article_res/cover/20092164ddc109ce6ae56b1984246751.jpeg","Learning the Cancun Upgrade with lepton and perplexity","Building a quick conversation-based search demo with Lepton AI.",[87,95,103,111,119],{"id":88,"publish_date":89,"is_original":23,"collection":90,"cover_url":91,"cover_url_1_1":92,"title":93,"summary":94,"author":28},627,"2025-03-20","#AI Avatar #AI Video Generation","article_res/cover/d95481358f73924989f8c4ee9c75d1c8.jpeg","article_res/cover/b74bc0fab01f8b6a6aa87696c0c3ed8b.jpeg","DisPose: Generating Animated Videos by Driving Video with Reference Images","DisPose is a controllable human image animation method that enhances video generation.",{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},626,"2025-03-21","#Deep Dive into LLMs #LLM #RL #Andrej Karpathy #AlphaGo","article_res/cover/446553a5c8f8f2f07d97b20eaee84e56.jpeg","article_res/cover/e6c2823409c9b34624064b9acbaca6f1.jpeg","AlphaGo and the Power of Reinforcement Learning - Andrej Karpathy's Deep Dive on LLMs (Part 9)","Simply learning from humans will never surpass human capabilities.",{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},625,"2025-03-22","#Deep Dive into LLMs #LLM #RL #RLHF #Andrej Karpathy","article_res/cover/8da81d38b1e5cf558a164710fd8a5389.jpeg","article_res/cover/96f028d76c362a99a0dd56389e8f7a9b.jpeg","Reinforcement Learning from Human Feedback (RLHF) - Andrej Karpathy's Deep Dive on LLMs (Part 10)","Fine-Tuning Language Models from Human Preferences",{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},624,"2025-03-23","#Deep Dive into LLMs #LLM #Andrej Karpathy #AI Agent #MMM","article_res/cover/a5e7c3d48bb09109684d6513287c661d.jpeg","article_res/cover/d3f22b7c0ab8d82fd2da457a299e0773.jpeg","The Future of Large Language Models - Andrej Karpathy's In-Depth Explanation of LLM (Part 11)","preview of things to come",{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},623,"#Google #Voe #AI Video Generation","article_res/cover/c44062fea0f336c2b96b3928292392c2.jpeg","article_res/cover/a041041c69092ad3db191c5bf3ff981b.jpeg","Trial of Google's video generation model VOE2","Our state-of-the-art video generation model",[127,135,143],{"id":128,"publish_date":129,"is_original":23,"collection":130,"cover_url":131,"cover_url_1_1":132,"title":133,"summary":134,"author":28},300,"2024-04-16","#AI in Science #AGI","article_res/cover/6bf01e793e0f33e848572412eebdf9b0.jpeg","article_res/cover/91a5ee21dafecb914fabeb9430d46ec1.jpeg","Would Einstein lose his job - AI and Quantum Computing: A Glimpse into the Near Future","So Einstein's job is still safe.",{"id":136,"publish_date":137,"is_original":23,"collection":138,"cover_url":139,"cover_url_1_1":140,"title":141,"summary":142,"author":28},101,"2024-12-14","#Nvidia #AI 3D Generator","article_res/cover/693e07c85980c5c0c8fde3f037733f23.jpeg","article_res/cover/9ea8edff2d5d303ff3fffff3f6f9c3d9.jpeg","NVIDIA's open-source 3D project LLaMA-Mesh","LLaMA-Mesh: Unifying 3D Mesh Generation with Language Models",{"id":144,"publish_date":145,"is_original":23,"collection":146,"cover_url":147,"cover_url_1_1":148,"title":149,"summary":150,"author":28},131,"2024-11-10","#OpenAI","article_res/cover/87f8ed353ce39f31960e7cdfaf075a35.jpeg","article_res/cover/f597a63935f5cd32e484b4aadd6019e8.jpeg","ChatGPT has launched the Search function","Get fast, timely answers with links to relevant web sources.",{"big":152,"small":214},[153,181],{"title":154,"list":155},"AGENT",[156,157,165,173],{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":158,"publish_date":159,"is_original":23,"collection":160,"cover_url":161,"cover_url_1_1":162,"title":163,"summary":164,"author":28},622,"2025-03-24","#OWL #AI Agent #MAS #MCP #CUA","article_res/cover/cb50ca7f2bf4d1ed50202d7406e1c19a.jpeg","article_res/cover/4aa7aa3badfacf3cc84121334f1050dd.jpeg","OWL: Multi-agent collaboration","OWL: Optimized Workforce Learning for General Multi-Agent Assistance in Real-World Task Automation",{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},620,"2025-03-26","#LLM #Google #Gemini #AI Agent","article_res/cover/53751a6dbbe990b1eb0b63f3b062aed4.jpeg","article_res/cover/031344981f0a212ff82d1f3a64aa5756.jpeg","Gemini 2.5 Pro, claimed to be far ahead of the competition, has been released with great fanfare: comprehensively surpassing other LLMs and topping the global rankings","Gemini 2.5: Our most intelligent AI model",{"id":174,"publish_date":175,"is_original":23,"collection":176,"cover_url":177,"cover_url_1_1":178,"title":179,"summary":180,"author":28},616,"2025-03-29","#MAS #AI Agent #AI Coder #MetaGPT #MGX","article_res/cover/9dcd702ad2035902e5e77967c34a1f1e.jpeg","article_res/cover/0a97fc4a922753c8f46ff38792020df8.jpeg","MGX - An automated website-building platform composed of multiple AI Agents","Your 24/7 AI Team | Dream, Chat, Create.",{"title":182,"list":183},"OPENAI",[184,191,199,206],{"id":185,"publish_date":167,"is_original":23,"collection":186,"cover_url":187,"cover_url_1_1":188,"title":189,"summary":190,"author":28},619,"#OpenAI #AI Image Generator #4o #MMM #AR Transformer","article_res/cover/2faffc97fcecf3151552cb0fd3206d89.jpeg","article_res/cover/1133cb4948af44cee2e7fbe79efb69e5.jpeg","The native image function of GPT-4o is officially launched","Introducing 4o Image Generation",{"id":192,"publish_date":193,"is_original":4,"collection":194,"cover_url":195,"cover_url_1_1":196,"title":197,"summary":198,"author":28},434,"2023-07-15","#Anthropic #OpenAI #Google #AI Code Generator #Claude","article_res/cover/e1b6f600a2b9f262a4392684e5f2ce25.jpeg","article_res/cover/6e1772e83f78f9a351ab23d3e414adee.jpeg","Latest Updates on Google Bard /Anthropic Claude2 / ChatGPT Code Interpreter","We want our models to use their programming skills to provide more natural interfaces to the basic functions of our computers.  \n - OpenAI",{"id":200,"publish_date":201,"is_original":4,"collection":146,"cover_url":202,"cover_url_1_1":203,"title":204,"summary":205,"author":28},417,"2023-08-24","article_res/cover/bccf897d50a88b18364e35f7466387e0.jpeg","article_res/cover/2f871085c1073717c1703ae86e18056f.jpeg","The GPT-3.5 Turbo fine-tuning (fine-tuning function) has been released～","Developers can now bring their own data to customize GPT-3.5 Turbo for their use cases.",{"id":207,"publish_date":208,"is_original":4,"collection":209,"cover_url":210,"cover_url_1_1":211,"title":212,"summary":213,"author":28},407,"2023-09-22","#OpenAI #AI Image Generator","article_res/cover/c59005e903d35cfc32346e2756e2728a.jpeg","article_res/cover/ba011d265e6d84b5c8cb6fd6b757b6cc.jpeg","Dall-E 3","DALL·E 3 understands significantly more nuance and detail, allowing you to easily translate your ideas into images.",[215,221,241],{"title":10,"list":216},[217,218,219,220],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":223},"GOOGLE",[224,225,226,234],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},615,"2025-03-30","#AI Researcher #AI Science #HKU #Google #AI Agent","article_res/cover/21fadf906067714bb0db31ae13a77c15.jpeg","article_res/cover/2697999a72bd26b22e85f0e92936d3ed.jpeg","AI-Researcher: LLM-driven全自动 scientific research assistant","AI-Researcher: Fully-Automated Scientific Discovery with LLM Agents  \nOpen-Sourced Alternative to Google AI Co-Scientist",{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},463,"2023-05-09","article_res/cover/89800f207723acdb55fc53bf999ebdc9.jpeg","article_res/cover/5764f369b4accd8f83e94aa4c077a175.jpeg","The Smallville sandbox world - A town with 25 virtual residents","Believable proxies of human behavior can empower interactive apps: Immersive environment, Rehearsal space, Prototyping tool",{"title":242,"list":243},"NVIDIA",[],true,{"code":4,"msg":5,"data":246},{"id":247,"publish_date":248,"is_original":23,"collection":249,"articles_id":250,"cover_url":251,"cover_url_1_1":252,"title":253,"summary":254,"author":28,"content":255,"popular":256,"list":322,"category":371,"tag":372},218,"2024-07-16","#Google #AI Image Generator #AI Image Editor","x__RehhmhhDl8kmMZQZ-sQ","article_res/cover/87672e26d288ee3b0678dcbf32f1e4e2.jpeg","article_res/cover/4189d656ac3e42fd0926cac55198d070.jpeg","Google's Magic Insert achieves style-aware and realistic insertion effects by dragging into the target image","Magic Insert: Style-Aware Drag-and-Drop","\u003Cdiv class=\"rich_media_content js_underline_content\n                       autoTypeSetting24psection\n            \" id=\"js_content\">\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>Yesterday, I shared a Google research study, and today I'll share another one. This research currently only has a paper, no open-source code yet. The study is named \"Magic Insert: Style-Aware Drag-and-Drop\".\u003C/p>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>Through Magic Insert, the main subject of an image can be dragged and dropped into another target image with a completely different style, achieving a style-aware and realistic insertion effect.\u003C/p>\u003Ch2 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);cursor: pointer;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;text-wrap: wrap;background-color: rgb(255, 255, 255);'>\u003Cspan style=\"cursor: pointer;font-size: 22px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">Effect\u003C/span>\u003C/h2>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>The effectiveness and diversity in style-aware insertion are demonstrated. These examples cover various artistic styles of subjects and target backgrounds, ranging from realistic scenes to cartoons and paintings.\u003C/p>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);cursor: pointer;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;text-wrap: wrap;background-color: rgb(255, 255, 255);'>\u003Cspan style=\"cursor: pointer;font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">LLM-guided pose adjustment\u003C/span>\u003C/h3>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>The example shows Magic Insert pose editing guided by LLM, where the LLM suggests plausible poses and environmental interactions for image regions, and Magic Insert generates and inserts a stylized subject with the corresponding pose into the image.\u003C/p>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);cursor: pointer;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;text-wrap: wrap;background-color: rgb(255, 255, 255);'>\u003Cspan style=\"cursor: pointer;font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">Bootstrap domain adaptation results\u003C/span>\u003C/h3>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>Using a pre-trained subject insertion module without Bootstrap domain adaptation leads to suboptimal results with failure modes such as missing shadows and reflections, or adding distortions and artifacts.\u003C/p>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);cursor: pointer;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;text-wrap: wrap;background-color: rgb(255, 255, 255);'>\u003Cspan style=\"cursor: pointer;font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">Style-aware personalization with attribute modification\u003C/span>\u003C/h3>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>It allows modification of key attributes of the subject, such as those shown in the figure, while consistently applying the target style during the generation process. This enables redesigning characters or adding accessories, greatly increasing the flexibility for creative uses. It is worth noting that this capability disappears when using ControlNet.\u003C/p>\u003Ch3 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);cursor: pointer;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;text-wrap: wrap;background-color: rgb(255, 255, 255);'>\u003Cspan style=\"cursor: pointer;font-size: 20px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">Editability / Fidelity Trade-off\u003C/span>\u003C/h3>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>The generation results of Space Navy with different fine-tuning iterations (as shown in the figure) demonstrate the editability / fidelity trade-off phenomenon, adopting a \"green ships\" style and adding a \"sitting on the ground\" text prompt. When the style-aware personalized model is fine-tuned for longer on the subject, the resulting subject has stronger fidelity but reduced flexibility in editing poses or other semantic attributes. This may also affect the editability of the style.\u003C/p>\u003Ch2 style='margin-top: 30px;margin-bottom: 15px;color: rgba(0, 0, 0, 0.85);cursor: pointer;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;text-wrap: wrap;background-color: rgb(255, 255, 255);'>\u003Cspan style=\"cursor: pointer;font-size: 22px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">Method\u003C/span>\u003C/h2>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>To generate subjects that respect both the target image's style and retain the essence and identity of the subject, Magic Insert takes the following steps:\u003C/p>\u003Col style='margin-top: 8px;margin-bottom: 8px;cursor: pointer;padding-left: 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;background-color: rgb(255, 255, 255);' class=\"list-paddingleft-1\">\u003Cli style=\"cursor: pointer;\">\u003Csection style=\"cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">：Personalizing diffusion models in the weight and embedding space by training LoRA increments on top of a pre-trained diffusion model, while simultaneously training two textual token embeddings using the diffusion denoising loss.\u003C/section>\u003C/li>\u003Cli style=\"cursor: pointer;\">\u003Csection style=\"cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">：Using personalized diffusion models to generate style-aware subjects by embedding the style of a target image and injecting adapter styles into selective up-sampling layers of the model during the denoising process.\u003C/section>\u003C/li>\u003C/ol>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>To insert style-aware personalized generated subjects, we perform the following steps:\u003C/p>\u003Col style='margin-top: 8px;margin-bottom: 8px;cursor: pointer;padding-left: 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;background-color: rgb(255, 255, 255);' class=\"list-paddingleft-1\">\u003Cli style=\"cursor: pointer;\">\u003Csection style=\"cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">：Paste the segmented main version onto the target image.\u003C/section>\u003C/li>\u003Cli style=\"cursor: pointer;\">\u003Csection style=\"cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">：Run our main subject insertion model on the shadow-removed image, which creates contextual cues and realistically embeds the subject into the image, including shadows and reflections.\u003C/section>\u003C/li>\u003C/ol>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>, which is to adapt the model's effective domain by using a subset of the model's own outputs. The specific steps are as follows:\u003C/p>\u003Col style='margin-top: 8px;margin-bottom: 8px;cursor: pointer;padding-left: 25px;color: rgb(0, 0, 0);font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;font-size: 16px;letter-spacing: normal;text-align: left;text-wrap: wrap;background-color: rgb(255, 255, 255);' class=\"list-paddingleft-1\">\u003Cli style=\"cursor: pointer;\">\u003Csection style=\"cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">Use the main body removal/insertion model to first remove the main body and shadows from the data set of the target domain.\u003C/section>\u003C/li>\u003Cli style=\"cursor: pointer;\">\u003Csection style=\"cursor: pointer;margin-top: 5px;margin-bottom: 5px;color: rgb(1, 1, 1);line-height: 1.8em;letter-spacing: 0em;\">Filter out defective outputs and retrain the main body removal/insertion model using the filtered image set.\u003C/section>\u003C/li>\u003C/ol>\u003Cp style='margin-bottom: 0px;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;padding-top: 8px;padding-bottom: 8px;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;text-wrap: wrap;background-color: rgb(255, 255, 255);'>We observe that the initial distribution (blue) changes after training (purple), and images initially misprocessed (red samples) are subsequently correctly processed (green samples). During bootstrap domain adaptation, we only train on the initially correct samples (green).\u003C/p>\u003Cp>\u003Cbr>\u003C/p>\u003Ch2 style='margin-top: 30px;margin-bottom: 15px;text-wrap: wrap;color: rgba(0, 0, 0, 0.85);cursor: pointer;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;letter-spacing: normal;text-align: left;background-color: rgb(255, 255, 255);'>\u003Cspan style=\"cursor: pointer;font-size: 22px;color: rgb(0, 0, 0);line-height: 1.5em;letter-spacing: 0em;font-weight: bold;display: block;\">Compare\u003C/span>\u003C/h2>\u003Cp style='margin-bottom: 0px;padding-top: 8px;padding-bottom: 8px;text-wrap: wrap;cursor: pointer;color: rgb(0, 0, 0);font-size: 16px;line-height: 1.8em;letter-spacing: normal;text-align: left;font-family: Optima, \"Microsoft YaHei\", PingFangSC-regular, serif;background-color: rgb(255, 255, 255);'>A comparison of style-aware personalization methods with top baseline methods, StyleAlign + ControlNet and InstantStyle + ControlNet. It can be seen that the baseline methods are able to generate decent outputs, but still lag behind Magic Insert's style-aware personalization method in overall quality. In particular, the output of InstantStyle + ControlNet often appears slightly blurry and fails to capture the contrast of the main subject features well.\u003C/p>\u003Cp style=\"display: none;\">\u003Cmp-style-type data-value=\"3\">\u003C/mp-style-type>\u003C/p>\u003C/div>",[257,266,274,281,289,297,306,314],{"id":258,"title_md5":259,"publish_date":260,"author_md5":261,"is_original":23,"collection":262,"summary_md5":263,"cover_url":264,"cover_url_1_1":265},183,"3f741220babab7d253633d147110226c","2024-09-03","bc27fa490c4d0d525bac812fc0793534","#Meta","02d3cd60c5f2788d07872bf9c1e2a639","article_res/cover/1430550bbf1444488b94d767f1dd97c3.jpeg","article_res/cover/e06f0dabb08939da492aaaf263e5f07f.jpeg",{"id":267,"title_md5":268,"publish_date":269,"author_md5":261,"is_original":23,"collection":270,"summary_md5":271,"cover_url":272,"cover_url_1_1":273},207,"7a39aaf30ce3a56e97eff69e83d8773e","2024-08-01","#Buffett","f2fe71f1fe3b22d3e284a0c3b475f7fd","article_res/cover/1aa0c688a3c0c88bdbe206fef021d450.jpeg","article_res/cover/22e9be4de3802e882c2d274130d477d5.jpeg",{"id":275,"title_md5":276,"publish_date":277,"author_md5":261,"is_original":23,"collection":65,"summary_md5":278,"cover_url":279,"cover_url_1_1":280},180,"20a07cafdf9c3de70a291a867667df82","2024-09-06","9cc6b01a69f2de18e61d909603bd1cb4","article_res/cover/ce95c7068a7e2fba26c9090ada7750c4.jpeg","article_res/cover/a61e390482a0d4c927e46660137ab17d.jpeg",{"id":282,"title_md5":283,"publish_date":284,"author_md5":285,"is_original":4,"collection":5,"summary_md5":286,"cover_url":287,"cover_url_1_1":288},604,"7a40c4bb16062ca4a7b649391d472b74","2022-03-13","311a46cfdaa3afda544e9285644f70d7","b91d2995e009e330a823c2c6e37ecfc9","article_res/cover/b1e7e12d55932a445e91d45f896c486a.jpeg","article_res/cover/0c07557158250cca3a12a1ea1b9d9a48.jpeg",{"id":290,"title_md5":291,"publish_date":292,"author_md5":261,"is_original":23,"collection":293,"summary_md5":294,"cover_url":295,"cover_url_1_1":296},305,"3f0dd1f5472bcd0ad3e9224afeaf93a1","2024-03-30","#AI Audio Generator #AI Avatar #Tencent","ca68545fdf728d1e05d8b25374057642","article_res/cover/238d4919401a20eefc9fb59e438ccac9.jpeg","article_res/cover/b15d2c460f8f4b558cbbcc9d5c5c37c9.jpeg",{"id":298,"title_md5":299,"publish_date":300,"author_md5":301,"is_original":4,"collection":302,"summary_md5":303,"cover_url":304,"cover_url_1_1":305},512,"a7e99f6e3921bf69fc994dae723fc0bc","2022-06-25","8936f53b23e9dfe2965d9e2ec23a4779","#Philosophy","b118be0853ceab732ba10ecccf078ef7","article_res/cover/f4500b06d6383effc25eb83a7ad6b035.jpeg","article_res/cover/ecf655f8012c89487d9fd130ac13c9f0.jpeg",{"id":307,"title_md5":308,"publish_date":309,"author_md5":261,"is_original":23,"collection":310,"summary_md5":311,"cover_url":312,"cover_url_1_1":313},65,"355241d059bad6536d0bb9bea822f270","2025-01-20","#AI Image Generator #AI Video Generator #Krea #AI 3D Generator","29bfdafcda8f22bb9177d697aa2e5446","article_res/cover/aa9dbc8db793695f3f24fcf41afc1fee.jpeg","article_res/cover/512cc57558a4f8a8eff6a2acbdb816a3.jpeg",{"id":315,"title_md5":316,"publish_date":317,"author_md5":261,"is_original":23,"collection":318,"summary_md5":319,"cover_url":320,"cover_url_1_1":321},56,"d4e3783328eabf0dc6a4f70dbd4ea8c5","2025-01-28","#DeepSeek #MMM #AI Image Generator #Multimodal Understanding","004cf9fb1336b7e5461ddc72394e80af","article_res/cover/3b7efa559e0888141c890e933d504510.jpeg","article_res/cover/653880885ccb19b1ba9496f2aaee4a38.jpeg",{"related":323,"small":356},[324,332,333,341,349],{"id":325,"publish_date":326,"is_original":4,"collection":5,"cover_url":327,"cover_url_1_1":328,"title":329,"summary":330,"author":331},563,"2022-04-23","article_res/cover/3da28616745d4ce670a5cf6cd6b0ec1d.jpeg","article_res/cover/7679a257f47df7d71f36093076587e60.jpeg","GameFi Dashboard organization","This week, I translated an article introducing GameFi, and today I'll share some reports on GameFi. GameFi is known as a game with a financial core under its gaming appearance...","Learning and Sharing",{"id":21,"publish_date":22,"is_original":23,"collection":5,"cover_url":24,"cover_url_1_1":25,"title":26,"summary":27,"author":28},{"id":334,"publish_date":335,"is_original":4,"collection":5,"cover_url":336,"cover_url_1_1":337,"title":338,"summary":339,"author":340},570,"2022-04-16","article_res/cover/0e3abd5e8012a43838e36dd9c78671f7.jpeg","article_res/cover/a3c3af255bb3c07f731de4de77669fcd.jpeg","What is GameFi? An Introduction to Play-to-Earn and Blockchain Games","In August 2021, Axie Infinity became the first blockchain game to surpass $1 billion in total token sales.","Translation",{"id":342,"publish_date":343,"is_original":4,"collection":5,"cover_url":344,"cover_url_1_1":345,"title":346,"summary":347,"author":348},498,"2022-12-06","article_res/cover/1df4fc45208a83fe02c79808d4aa49cc.jpeg","article_res/cover/f14d52ccadcd0c206e5ccdbaeea7298f.jpeg","From monetary policy to long-term interests, overall interests","If all loans were repaid, there would be no bank deposits, and the whole currency circulation would dry up. - Robert H. Hemphill","Course Notes",{"id":350,"publish_date":351,"is_original":23,"collection":5,"cover_url":352,"cover_url_1_1":353,"title":354,"summary":355,"author":28},197,"2024-08-14","article_res/cover/22174743c1501f73c6b18fe0c76eaba5.jpeg","article_res/cover/a4056722ad71dc449138929b900a70fd.jpeg","MiniCPM, a GPT-4V-level multimodal language model running on mobile devices","Today I studied a project called MiniCPM, which is a GPT-4V-level multimodal language model (MLL) running on mobile phones.",[357,363,369],{"title":10,"list":358},[359,360,361,362],{"id":96,"publish_date":97,"is_original":23,"collection":98,"cover_url":99,"cover_url_1_1":100,"title":101,"summary":102,"author":28},{"id":104,"publish_date":105,"is_original":23,"collection":106,"cover_url":107,"cover_url_1_1":108,"title":109,"summary":110,"author":28},{"id":112,"publish_date":113,"is_original":23,"collection":114,"cover_url":115,"cover_url_1_1":116,"title":117,"summary":118,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"title":222,"list":364},[365,366,367,368],{"id":120,"publish_date":113,"is_original":23,"collection":121,"cover_url":122,"cover_url_1_1":123,"title":124,"summary":125,"author":28},{"id":166,"publish_date":167,"is_original":23,"collection":168,"cover_url":169,"cover_url_1_1":170,"title":171,"summary":172,"author":28},{"id":227,"publish_date":228,"is_original":23,"collection":229,"cover_url":230,"cover_url_1_1":231,"title":232,"summary":233,"author":28},{"id":235,"publish_date":236,"is_original":23,"collection":73,"cover_url":237,"cover_url_1_1":238,"title":239,"summary":240,"author":28},{"title":242,"list":370},[],[8,9,10],[8,12,13,14,9,10,15,16,17,18],["Reactive",245],1754646419516]