{ "cells": [ { "cell_type": "markdown", "id": "9c5c18a1", "metadata": {}, "source": [ "* Milvus Vector Database: https://milvus.io" ] }, { "cell_type": "markdown", "id": "2df4fa0b", "metadata": {}, "source": [ "# Libraries" ] }, { "cell_type": "code", "execution_count": 1, "id": "df27797f", "metadata": { "execution": { "iopub.execute_input": "2023-11-27T22:22:07.640281Z", "iopub.status.busy": "2023-11-27T22:22:07.639988Z", "iopub.status.idle": "2023-11-27T22:22:07.900025Z", "shell.execute_reply": "2023-11-27T22:22:07.899821Z", "shell.execute_reply.started": "2023-11-27T22:22:07.640258Z" }, "scrolled": true }, "outputs": [], "source": [ "from pymilvus import connections, Collection, utility" ] }, { "cell_type": "markdown", "id": "4876213b", "metadata": {}, "source": [ "# Connection and Collection" ] }, { "cell_type": "code", "execution_count": 2, "id": "092108a6", "metadata": { "execution": { "iopub.execute_input": "2023-11-27T22:22:07.900673Z", "iopub.status.busy": "2023-11-27T22:22:07.900543Z", "iopub.status.idle": "2023-11-27T22:22:07.912394Z", "shell.execute_reply": "2023-11-27T22:22:07.912186Z", "shell.execute_reply.started": "2023-11-27T22:22:07.900666Z" } }, "outputs": [], "source": [ "connections.connect(\"default\", host=\"localhost\", port=\"19530\")\n", "\n", "collection = Collection(\"llamalection\")" ] }, { "cell_type": "markdown", "id": "6a2a2b8f", "metadata": {}, "source": [ "# Schema Definition" ] }, { "cell_type": "code", "execution_count": 3, "id": "baa95bb6", "metadata": { "execution": { "iopub.execute_input": "2023-11-27T22:22:07.912768Z", "iopub.status.busy": "2023-11-27T22:22:07.912680Z", "iopub.status.idle": "2023-11-27T22:22:07.914693Z", "shell.execute_reply": "2023-11-27T22:22:07.914152Z", "shell.execute_reply.started": "2023-11-27T22:22:07.912761Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'auto_id': False, 'description': '', 'fields': [{'name': 'id', 'description': '', 'type': , 'params': {'max_length': 65535}, 'is_primary': True, 'auto_id': False}, {'name': 'embedding', 'description': '', 'type': , 'params': {'dim': 384}}], 'enable_dynamic_field': True}\n" ] } ], "source": [ "print(collection.schema)" ] }, { "cell_type": "markdown", "id": "2411948b-2a54-46e8-be1c-b3916c32dff8", "metadata": {}, "source": [ "# Embeddings Count" ] }, { "cell_type": "code", "execution_count": 4, "id": "957c6f1c-31b7-40e8-ab18-02069802fe70", "metadata": { "execution": { "iopub.execute_input": "2023-11-27T22:22:07.915022Z", "iopub.status.busy": "2023-11-27T22:22:07.914952Z", "iopub.status.idle": "2023-11-27T22:22:08.265218Z", "shell.execute_reply": "2023-11-27T22:22:08.263838Z", "shell.execute_reply.started": "2023-11-27T22:22:07.915015Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{'count(*)': 3}]\n" ] } ], "source": [ "response = collection.query(\n", " expr = \"\",\n", " output_fields = [\"count(*)\"],\n", ")\n", "\n", "print(response)" ] }, { "cell_type": "markdown", "id": "33f35de9-a15c-48ea-82c9-b4e95d9c8c25", "metadata": {}, "source": [ "# Embeddings Content" ] }, { "cell_type": "code", "execution_count": 5, "id": "c59696a9-a8f7-41c3-86d5-af2a80ea7fea", "metadata": { "execution": { "iopub.execute_input": "2023-11-27T22:22:08.267001Z", "iopub.status.busy": "2023-11-27T22:22:08.266667Z", "iopub.status.idle": "2023-11-27T22:22:08.665954Z", "shell.execute_reply": "2023-11-27T22:22:08.664748Z", "shell.execute_reply.started": "2023-11-27T22:22:08.266968Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'id': '1f3342ca-d98d-4217-b7d1-d6dc83baa36b', 'page_label': '1', 'file_name': 'Test.pdf', 'file_path': 'data/Test.pdf', 'file_type': 'application/pdf', 'file_size': 117715, 'creation_date': '2023-11-27', 'last_modified_date': '2023-11-10', 'last_accessed_date': '2023-11-27', '_node_content': '{\"id_\": \"1f3342ca-d98d-4217-b7d1-d6dc83baa36b\", \"embedding\": null, \"metadata\": {\"page_label\": \"1\", \"file_name\": \"Test.pdf\", \"file_path\": \"data/Test.pdf\", \"file_type\": \"application/pdf\", \"file_size\": 117715, \"creation_date\": \"2023-11-27\", \"last_modified_date\": \"2023-11-10\", \"last_accessed_date\": \"2023-11-27\"}, \"excluded_embed_metadata_keys\": [\"file_name\", \"file_type\", \"file_size\", \"creation_date\", \"last_modified_date\", \"last_accessed_date\"], \"excluded_llm_metadata_keys\": [\"file_name\", \"file_type\", \"file_size\", \"creation_date\", \"last_modified_date\", \"last_accessed_date\"], \"relationships\": {\"1\": {\"node_id\": \"2adcbbb1-318c-4c39-86dc-927fced21621\", \"node_type\": \"4\", \"metadata\": {\"page_label\": \"1\", \"file_name\": \"Test.pdf\", \"file_path\": \"data/Test.pdf\", \"file_type\": \"application/pdf\", \"file_size\": 117715, \"creation_date\": \"2023-11-27\", \"last_modified_date\": \"2023-11-10\", \"last_accessed_date\": \"2023-11-27\"}, \"hash\": \"0319c9749f2cd7d3b47db2abb4865dd9573a8a646d3b69718fa97a9664e9b3cc\", \"class_name\": \"RelatedNodeInfo\"}, \"3\": {\"node_id\": \"596b3b27-9546-49b4-aae9-1987ce11e099\", \"node_type\": \"1\", \"metadata\": {\"page_label\": \"2\", \"file_name\": \"Test.pdf\", \"file_path\": \"data/Test.pdf\", \"file_type\": \"application/pdf\", \"file_size\": 117715, \"creation_date\": \"2023-11-27\", \"last_modified_date\": \"2023-11-10\", \"last_accessed_date\": \"2023-11-27\"}, \"hash\": \"d60444efb8d6528f86ee1280f9c5199792e2bf1b88d3c61ee201ad9c15d1c53a\", \"class_name\": \"RelatedNodeInfo\"}}, \"hash\": \"73ab5e9358a81f89bec25431e29450806f4d0d12546e84b3484938ff8dacb605\", \"text\": \"Simple\\\\nLabel Data\\\\ndate|closing|MMMM Junedate|closing|MMMM D, YYYY June 13, 2023date|closing|YYYY 2023date|closing_7|MMMM D, YYYY June 20, 2023date|generation 2023-11-01T15:46:55+00:00date|policy|M/D/YY 6/13/23accounting|k|sum $1,013,940.00accounting|k|description Sales Price of Propertyaccounting|g3|amount $297.42accounting|g3|description Property Taxesaccounting|loan_amount|amount $500,000.00accounting|owners_endorsements|amount $614.00accounting|property|sum $1,000,000.00accounting|purchase_price|amount $500,000.00order|number 2022/11/22/00598party|address 525 Ivy Street Houston 77074 TX US, 7336 Hilldale Drive El Paso 79927 TX US, 136 Myron Wells 15769 TX US, 5465 Pfeffer Loaf 25437 TX US, 0139 Mona Stream 66196 TX US, 0299 Hansen Canyon 76934 TX US, 50772 Paige Plains 36897 TX US, 74981 JaedenKeys 22885 TX US, 473 Larkin Divide 06961 TX US, 5678 Howe Corners 25891 TX US and 250 Heller Brooks 94281 TX USparty|email jdoe@yahoo.com.mx, Broderick_Flatley@gmail.com, Maxwell37@gmail.com, Ayana_Schamberger37@gmail.com, Albin.Moore76@yahoo.com, Noel20@gmail.com andpe5vfhvu@grainchain.devparty|name Jane Doe, Jamie Carter B., Chadrick Wilkinson C. and Zelda D\\'Amore B.party|name|initials JD, JCB, CWC and ZDABparty|name|uppercase JANE DOE, JAMIE CARTER B., CHADRICK WILKINSON C. AND ZELDA D\\'AMORE B.party|name_buyer Jane Doe and Zelda D\\'Amore B.party|name_settlement_agency Unknownparty|parent_settlement_agency Unknownparty|phone 2149180561, 264-304-6342 x671, 1-716-843-3447 x22419, (610) 995-6558 x3284 and 796.945.4565party|ssn_settlement_agency Unknownproperty|address 7604 Pumpkin Hill Court Cedar Park Unknown 78613 TX US and 64 Mountainview Street San Antonio Unknown 78233 TX USproperty|address|lowercase 7604 pumpkin hill court cedar park unknown 78613 tx us and 64 mountainview street san antonio unknown 78233 tx usproperty|county Cedar Park and San Antonioproperty|legal_description Legal description for 7604 Pumpkin Hill Court and Legal description for 64 Mountainview Streetproperty|loan_amount $200,000.00,$300,000.00property|purchase_price $200,000.00,$300,000.00\\\\nImages\", \"start_char_idx\": null, \"end_char_idx\": null, \"text_template\": \"{metadata_str}\\\\n\\\\n{content}\", \"metadata_template\": \"{key}: {value}\", \"metadata_seperator\": \"\\\\n\", \"class_name\": \"TextNode\"}', '_node_type': 'TextNode', 'document_id': '2adcbbb1-318c-4c39-86dc-927fced21621', 'doc_id': '2adcbbb1-318c-4c39-86dc-927fced21621', 'ref_doc_id': '2adcbbb1-318c-4c39-86dc-927fced21621', 'embedding': [0.031603746, -0.07105771, 0.028062077, 0.01079095, 0.011095744, 0.010383652, -0.038875945, 0.039699517, -0.016333003, -0.021024285, 0.06426585, -0.019446954, 0.0057799993, -0.044637337, -0.035930593, -0.0050184187, -0.011890076, -0.0064755245, 0.021297643, 0.022488331, 0.07081364, -0.047656506, 0.0042520044, 0.025551526, 0.06900463, 0.02023365, 0.023971526, -0.022157399, -0.057475496, -0.13729337, 0.050558135, -0.02353167, 0.036993433, -0.045911077, 0.04123027, -0.0028286027, -0.057179082, -4.6172216e-05, 0.0030029987, -0.01114006, 0.022735285, 0.012543171, 0.011364902, 0.026045064, 0.0039626085, -0.01235674, -0.037236605, 0.016728228, 0.017501185, 0.044898793, -0.05007796, -0.0067632226, -0.009312568, 0.040948085, -0.022811355, 0.060638335, 0.035593927, 0.005057315, 0.018891586, 0.04957502, 0.0036701856, 0.001472435, -0.21157175, 0.027447425, 0.0025616484, -0.017939072, 0.00398365, 0.011011481, 6.155316e-05, 0.03164784, 0.01437163, 0.0029798392, -0.013363528, 0.009087859, 0.013407063, -0.07115047, 0.031486522, 0.030861972, -0.014954134, -0.034231655, -0.06731773, -0.0047710524, 0.026920317, -0.06011694, -0.042831626, -0.024000704, 0.032006286, 0.037798893, 0.009131962, -0.0056897793, 0.0094264895, -0.022514008, -0.028282462, 0.028364303, -0.10804796, -0.00673411, 0.017454373, 0.029053397, -0.007319414, 0.42983827, -0.038127176, 0.044108965, 0.007388164, -0.04573997, 0.0063601774, -0.00012119955, -0.028029319, -0.028636152, -0.029535646, -0.023401372, 0.01661981, -0.022507397, 0.04828101, -0.09911933, 0.011494717, 0.01428024, 0.053057663, -0.01562986, 0.06327042, -0.061784428, 0.06091552, 0.053465907, 0.019319635, 0.021268344, -0.024173776, -0.066615015, 0.01597774, 0.09272004, -0.01464117, 0.058839515, 0.021428423, 0.0048763715, -0.07740598, 0.006285363, -0.021318816, -0.00033027225, -0.020901153, 0.006762184, 0.0132719, -0.0035061478, -0.060103316, -0.039823886, -0.050108105, -0.048508033, -0.035085898, 0.09731442, 0.013417826, -0.06291131, 0.0015205704, -0.010948397, 0.028770208, 0.07464126, -0.025664544, -0.023197442, 0.013971152, 0.05119616, 0.04128398, -0.008261137, -0.03634625, 0.05084613, 0.062605485, -0.011832489, -0.01341752, 0.06708495, 0.034817748, -0.17495346, -0.02021428, 0.043113735, -0.038992625, -0.043838456, 0.04330636, 0.027466249, -0.042054206, 0.0020421932, 0.056919582, 0.012898488, 0.027558574, -0.01933272, -0.0052027316, 0.019286752, -0.067501694, 0.061463013, 0.002926764, 0.006591596, -0.0077835256, 0.005566575, -0.040339094, 0.008054091, -0.022655914, 0.011935011, 0.0009768049, -0.03123888, -0.047911596, 0.014489832, -0.027821371, 0.0018171779, -0.026596993, 0.008386415, 0.077239715, -0.01153633, 0.04912617, 0.0030492702, -0.038827203, 0.03864512, 0.04201656, 0.044312634, 0.052643307, 0.02948877, 0.028723082, 0.013752327, 0.017523296, 0.060947035, -0.008370889, -0.05209136, -0.0056532896, 0.039850496, -0.0062729544, 0.03568069, 0.026387792, 0.044098906, -0.03377718, -0.040784378, 0.0035555235, -0.35138077, -0.00064172695, 0.00010503744, -0.019710852, -0.021865064, -0.04729819, 0.033722185, 0.037615143, 0.003959352, 0.101651095, 0.039250247, -0.009958315, -0.0575688, -0.019927103, -0.008648497, -0.05310667, 0.022185022, -0.020212375, 0.001192273, -0.01897671, 0.006459592, -0.051920857, -0.041568067, -0.010434363, 0.034598928, -0.04920203, 0.070777416, 0.0035710535, -0.026978785, -0.026430119, -0.0027473466, -0.002955941, -0.05677881, -0.040428415, 0.049419843, -0.0041644466, -0.0036892153, 0.035393994, -0.046790347, -0.06350051, -0.06259475, -0.017706148, 0.007129924, -0.026467742, 0.008198328, -0.029256515, -0.0021515067, 0.007594517, -0.013796461, 0.022118459, 0.017421514, -0.0143991895, 0.04402768, 0.00037755255, 0.06761231, -0.041920498, 0.0027303686, 0.0066755703, -0.024536598, -0.034385107, 0.042137396, -0.024778735, 0.050504357, 0.035176065, 0.014480239, -0.067165345, -0.04487998, -0.047411755, -0.06753275, 0.001995838, -0.013494676, -0.00558712, -0.0076402263, -0.023463668, 0.0012424858, 0.036630213, -0.05094216, -0.028264685, -0.015748367, -0.015327221, 0.051345497, -0.021171514, 0.07369023, -0.0023123536, -0.013471077, 0.08430794, 0.0070863934, -0.009420064, 0.049907584, -0.0034562824, -0.030013897, -0.005445252, 0.022521064, -0.0060797534, 0.016607488, -0.025006155, -0.28446126, -0.012456576, -0.07219983, 0.06345294, 0.01629345, 0.05447814, 0.013837751, 0.02088586, -0.016935032, 0.02720019, 0.027799452, 0.08370593, 0.024795592, -0.067219615, -0.0056322287, 0.0048109335, 0.028592844, 0.009245262, 0.030941082, 0.06384667, 0.033665102, -0.04717687, 0.16195796, 0.027872311, -0.027545676, -0.0017133226, -0.0054567014, 0.060579486, 0.092567995, -0.020373935, 0.013653342, 0.019456264, 0.08872532, -0.031493194, 0.00574812, 0.0009891393, 0.0035447318, -0.027879016, -0.00824295, 0.0317103, -0.013871338, -0.004124056, -0.044094976, 0.016563132, 0.054339536, 0.020703502, -0.03292528, -0.02537162, -0.02212617, 0.045299098, -0.007619904, 0.00674115, -0.01820146, -0.019805728, 0.003312709, 0.040897917, -0.0022292437, 0.018349096, 0.0032490853, -0.0054947403, -0.048012644, 0.029179854, 0.024341997, -0.051024277, -0.00043805272]}\n" ] } ], "source": [ "response = collection.query(\n", " expr = \"\",\n", " limit = 1,\n", " output_fields = [\"id\", \"file_path\", \"page_label\", \"*\"]\n", ")\n", "\n", "for output in response:\n", " print(output)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6" } }, "nbformat": 4, "nbformat_minor": 5 }