微信扫码
添加专属顾问
我要投稿
Gemini Embedding 2实现多模态统一向量表示,但多向量检索技术仍有不可替代的价值。核心内容: 1. Gemini Embedding 2的核心突破:全模态原生支持与灵活尺寸 2. 多向量检索技术的独特价值与适用场景 3. 统一表示与多向量检索的互补关系
https://milvus.io/docs/hybrid_search_with_milvus.md
"""Prerequisites:pip install google-genai pymilvusSet environment variable:export GOOGLE_API_KEY="your-api-key""""import osimport structimport numpy as npfrom google import genaifrom google.genai import typesfrom pymilvus import MilvusClient, DataType# ── Config ───────────────────────────────────────────────────────────────COLLECTION_NAME = "gemini_multimodal_demo"MILVUS_URI = "http://localhost:19530" # Change to your Milvus addressDIM = 3072 # gemini-embedding-2-preview output dimensionGEMINI_MODEL = "gemini-embedding-2-preview"# ── Initialize clients ──────────────────────────────────────────────────gemini_client = genai.Client() # Uses GOOGLE_API_KEY env varmilvus_client = MilvusClient(MILVUS_URI)# ── Helper: generate embedding ──────────────────────────────────────────def embed_texts(texts: list[str], task_type: str = "SEMANTIC_SIMILARITY") -> list[list[float]]:"""Embed a list of text strings."""result = gemini_client.models.embed_content(model=GEMINI_MODEL,contents=texts,config=types.EmbedContentConfig(task_type=task_type),)return [e.values for e in result.embeddings]def embed_image(image_path: str) -> list[float]:"""Embed an image file."""with open(image_path, "rb") as f:image_bytes = f.read()mime = "image/png" if image_path.endswith(".png") else "image/jpeg"result = gemini_client.models.embed_content(model=GEMINI_MODEL,contents=types.Part.from_bytes(data=image_bytes, mime_type=mime),)return result.embeddings[0].valuesdef embed_audio(audio_path: str) -> list[float]:"""Embed an audio file."""with open(audio_path, "rb") as f:audio_bytes = f.read()mime_map = {".mp3": "audio/mpeg", ".wav": "audio/wav", ".flac": "audio/flac"}ext = os.path.splitext(audio_path)[1].lower()mime = mime_map.get(ext, "audio/mpeg")result = gemini_client.models.embed_content(model=GEMINI_MODEL,contents=types.Part.from_bytes(data=audio_bytes, mime_type=mime),)return result.embeddings[0].values# ── 1. Create Milvus collection ─────────────────────────────────────────print("=== Creating collection ===")if milvus_client.has_collection(COLLECTION_NAME):milvus_client.drop_collection(COLLECTION_NAME)schema = milvus_client.create_schema()schema.add_field("id", DataType.INT64, is_primary=True, auto_id=True)schema.add_field("content", DataType.VARCHAR, max_length=2000) # description of the contentschema.add_field("modality", DataType.VARCHAR, max_length=20) # "text", "image", "audio"schema.add_field("vector", DataType.FLOAT_VECTOR, dim=DIM)index_params = milvus_client.prepare_index_params()index_params.add_index(field_name="vector",index_type="AUTOINDEX",metric_type="COSINE",)milvus_client.create_collection(COLLECTION_NAME,schema=schema,index_params=index_params,consistency_level="Strong",)print(f"Collection '{COLLECTION_NAME}' created (dim={DIM}, metric=COSINE)")# ── 2. Insert text embeddings ───────────────────────────────────────────print("\n=== Inserting text embeddings ===")documents = ["Artificial intelligence was founded as an academic discipline in 1956.","The Mona Lisa is a half-length portrait painting by Leonardo da Vinci.","Beethoven's Symphony No. 9 premiered in Vienna on May 7, 1824.","The Great Wall of China stretches over 13,000 miles across northern China.","Jazz music originated in the African-American communities of New Orleans.","The Hubble Space Telescope was launched into orbit on April 24, 1990.","Vincent van Gogh painted The Starry Night while in an asylum in Saint-Rémy.","Machine learning is a subset of AI focused on learning from data.",]text_vectors = embed_texts(documents)text_rows = [{"content": doc, "modality": "text", "vector": vec}for doc, vec in zip(documents, text_vectors)]milvus_client.insert(COLLECTION_NAME, text_rows)print(f"Inserted {len(text_rows)} text documents")# ── 3. (Optional) Insert image embeddings ───────────────────────────────# Uncomment and provide real image paths to test multimodal search## image_files = [# ("photo of the Mona Lisa painting", "mona_lisa.jpg"),# ("satellite photo of the Great Wall of China", "great_wall.png"),# ]# for desc, path in image_files:# if os.path.exists(path):# vec = embed_image(path)# milvus_client.insert(COLLECTION_NAME, [# {"content": desc, "modality": "image", "vector": vec}# ])# print(f"Inserted image: {desc}")# ── 4. (Optional) Insert audio embeddings ───────────────────────────────# Uncomment and provide real audio paths to test multimodal search## audio_files = [# ("Beethoven Symphony No.9 excerpt", "beethoven_9.mp3"),# ("jazz piano improvisation", "jazz_piano.mp3"),# ]# for desc, path in audio_files:# if os.path.exists(path):# vec = embed_audio(path)# milvus_client.insert(COLLECTION_NAME, [# {"content": desc, "modality": "audio", "vector": vec}# ])# print(f"Inserted audio: {desc}")# ── 5. Search ────────────────────────────────────────────────────────────print("\n=== Searching ===")queries = ["history of artificial intelligence","famous Renaissance paintings","classical music concerts",]query_vectors = embed_texts(queries, task_type="SEMANTIC_SIMILARITY")for query_text, query_vec in zip(queries, query_vectors):results = milvus_client.search(COLLECTION_NAME,data=[query_vec],limit=3,output_fields=["content", "modality"],search_params={"metric_type": "COSINE"},)print(f"\nQuery: '{query_text}'")for hits in results:for rank, hit in enumerate(hits, 1):print(f" [{rank}] (score={hit['distance']:.4f}, modality={hit['entity']['modality']}) "f"{hit['entity']['content'][:80]}")# ── 6. Cross-modal search example (image query -> text results) ─────────# Uncomment to search text collection using an image as query## print("\n=== Cross-modal search: image -> text ===")# query_image_vec = embed_image("query_image.jpg")# results = milvus_client.search(# COLLECTION_NAME,# data=[query_image_vec],# limit=3,# output_fields=["content", "modality"],# search_params={"metric_type": "COSINE"},# )# for hits in results:# for rank, hit in enumerate(hits, 1):# print(f" [{rank}] (score={hit['distance']:.4f}) {hit['entity']['content'][:80]}")# ── Cleanup ──────────────────────────────────────────────────────────────# milvus_client.drop_collection(COLLECTION_NAME)# print(f"\nCollection '{COLLECTION_NAME}' dropped")print("\nDone!")
阅读推荐 Embedding相似度虚高,如何用langchain+Milvus搭建CRAG解决? Agent都是伪需求!如何判断是否需要Multi-Agent,以及如何搭?" data-itemshowtype="0" linktype="text" data-linktype="2">80%的 Multi-Agent都是伪需求!如何判断是否需要Multi-Agent,以及如何搭? 养虾实战教程:我用OpenClaw做了个能盯盘,也能深度复盘的投资agent Qwen3.5-397B+Milvus+ColQwen2,如何做基于PDF的多模态RAG知识库 开源|Milvus2.6又有功能上新啦!Embedding Function、N-gram、decay ranker、field-level boosting、Highlighting解读
53AI,企业落地大模型首选服务商
产品:场景落地咨询+大模型应用平台+行业解决方案
承诺:免费POC验证,效果达标后再合作。零风险落地应用大模型,已交付160+中大型企业
2026-03-11
Gemini Embedding 2:首个原生五模态 embedding 模型
2026-03-11
Google 发布首个全模态 Embedding 2 模型,文本图片音视频 PDF 统一到一个向量空间
2026-03-11
谷歌首个原生多模态向量模型发布:Agent 可以用文字搜图片、用图片搜视频了...
2026-03-05
零帧起手 Codex × Figma 双向工作流实操
2026-02-27
NanoBanana 2.0 来了, 对比前一代和即梦 5.0 lite,它依旧强的离谱
2026-02-25
AI真人数字人语音对话性能优化实践总结
2026-02-13
“思考”更深,生成更准|Seedream 5.0 Lite 发布
2026-02-12
Seedance 2.0上线火山方舟体验中心,API即将开放
2025-12-15
2026-01-10
2025-12-17
2026-01-05
2025-12-14
2026-02-12
2026-01-27
2025-12-17
2026-01-16
2026-02-12
2026-03-12
2025-12-31
2025-08-04
2025-05-26
2025-05-13
2025-04-08
2025-04-05
2025-03-30