How to perform vector search with C#/.NET?

Last updated 20, Apr 2024

Goal

Understanding how to perform vector search using Redis as the vector database in the C# programming language on the .NET platform.

Solution

The following example demonstrates the execution of a simple example to model sentences as vector embeddings using Redis as the vector database and the NRedisStack client library for the .NET programming language. To start with the example, learn how to set up a C#/.NET project to use Redis as the vector database.

Install the following libraries, required by the example discussed in this document.

cd vector-test

dotnet add package NRedisStack
dotnet add package Microsoft.ML

Now edit the Program.cs file in the project folder and paste this content:

using NRedisStack;
using NRedisStack.RedisStackCommands;
using NRedisStack.Search;
using NRedisStack.Search.Aggregation;
using NRedisStack.Search.Literals.Enums;
using StackExchange.Redis;
using static NRedisStack.Search.Schema;

using System;
using System.Collections.Generic;
using Microsoft.ML;
using Microsoft.ML.Transforms.Text;
using System.Text.Json;


namespace Redis.SemanticSearch
{
    public static class VssExample
    {
        static void Main() {
            CreateIndex();
            ModelSentences();
            TestSentence();
        }

        private static void CreateIndex(){
            ConnectionMultiplexer redis = ConnectionMultiplexer.Connect("localhost:6379");
            IDatabase db = redis.GetDatabase();

            var schema = new Schema()
            .AddTextField(new FieldName("content", "content"))
            .AddTagField(new FieldName("genre", "genre"))
            .AddVectorField("embedding", VectorField.VectorAlgo.HNSW,
                new Dictionary<string, object>()
                {
                    ["TYPE"] = "FLOAT32",
                    ["DIM"] = "150",
                    ["DISTANCE_METRIC"] = "L2"
                }
            );

            SearchCommands ft = db.FT();
            ft.Create(
                "vector_idx",
                new FTCreateParams().On(IndexDataType.HASH).Prefix("doc:"),
                schema);
        }
        private static byte[] GetEmbedding(PredictionEngine<TextData, TransformedTextData> model, string sentence)
        {
            // Call the prediction API to convert the text into embedding vector.
            var data = new TextData()
            {
                Text = sentence
            };
            var prediction = model.Predict(data);

            // Convert prediction.Features to a binary blob
            float[] floatArray = Array.ConvertAll(prediction.Features, x => (float)x);
            byte[] byteArray = new byte[floatArray.Length * sizeof(float)];
            Buffer.BlockCopy(floatArray, 0, byteArray, 0, byteArray.Length);

            return byteArray;
        }

        private static PredictionEngine<TextData, TransformedTextData> GetPredictionEngine(){
            ConnectionMultiplexer redis = ConnectionMultiplexer.Connect("localhost");
            IDatabase db = redis.GetDatabase();

            // Create a new ML context, for ML.NET operations. It can be used for
            // exception tracking and logging, as well as the source of randomness.
            var mlContext = new MLContext();

            // Create an empty list as the dataset
            var emptySamples = new List<TextData>();

            // Convert sample list to an empty IDataView.
            var emptyDataView = mlContext.Data.LoadFromEnumerable(emptySamples);

            // A pipeline for converting text into a 150-dimension embedding vector
            var textPipeline = mlContext.Transforms.Text.NormalizeText("Text")
                .Append(mlContext.Transforms.Text.TokenizeIntoWords("Tokens",
                    "Text"))
                .Append(mlContext.Transforms.Text.ApplyWordEmbedding("Features",
                    "Tokens", WordEmbeddingEstimator.PretrainedModelKind
                    .SentimentSpecificWordEmbedding));

            // Fit to data.
            var textTransformer = textPipeline.Fit(emptyDataView);

            // Create the prediction engine to get the embedding vector from the input text/string.
            var predictionEngine = mlContext.Model.CreatePredictionEngine<TextData,
                TransformedTextData>(textTransformer);

            return predictionEngine;
        }

        public static void ModelSentences()
        {
            ConnectionMultiplexer redis = ConnectionMultiplexer.Connect("localhost");
            IDatabase db = redis.GetDatabase();

           var predictionEngine = GetPredictionEngine();

            // Create data
            var hash1 = new HashEntry[] { 
                new HashEntry("content", "That is a very happy person"), 
                new HashEntry("genre", "persons"),
                new HashEntry("embedding", GetEmbedding(predictionEngine, "That is a very happy person")),
            };
            db.HashSet("doc:1", hash1);

            var hash2 = new HashEntry[] { 
                new HashEntry("content", "That is a happy dog"), 
                new HashEntry("genre", "pets"),
                new HashEntry("embedding", GetEmbedding(predictionEngine, "That is a happy dog")),
            };
            db.HashSet("doc:2", hash2);

            var hash3 = new HashEntry[] { 
                new HashEntry("content", "Today is a sunny day"), 
                new HashEntry("genre", "weather"),
                new HashEntry("embedding", GetEmbedding(predictionEngine, "Today is a sunny day")),
            };
            db.HashSet("doc:3", hash3);
        }

        private static void TestSentence(){
            ConnectionMultiplexer redis = ConnectionMultiplexer.Connect("localhost");
            IDatabase db = redis.GetDatabase();
            var predictionEngine = GetPredictionEngine();

            SearchCommands ft = db.FT();
            var res = ft.Search("vector_idx",
                        new Query("*=>[KNN 3 @embedding $query_vec AS score]")
                        .AddParam("query_vec", GetEmbedding(predictionEngine, "That is a happy person"))
                        .ReturnFields(new FieldName("content", "content"), new FieldName("score", "score"))
                        .SetSortBy("score")
                        .Dialect(2));

            foreach (var doc in res.Documents) {
                Console.Write($"id: {doc.Id}, ");
                foreach (var item in doc.GetProperties()) {
                    Console.Write($" {item.Value}");
                }
                Console.WriteLine();
            }
        }

        private class TextData
        {
            public string Text { get; set; }
        }

        private class TransformedTextData : TextData
        {
            public float[] Features { get; set; }
        }
    }
}

You can now execute the project:

dotnet run

Note that the example will seem to hang the first time it is executed, but it will just take some time to download the embedding models.

The example will store three sentences ("That is a very happy person", "That is a happy dog", "Today is a sunny day") as Redis hashes and finds the similarity of the test sentence "That is a happy person" from the modeled sentences. Vector search is configured to return three results (KNN 3), and as expected, the minimum distance corresponds to the highest semantic similarity of the two sentences being compared.

id: doc:1,  4.30777168274 That is a very happy person
id: doc:2,  25.9752807617 That is a happy dog
id: doc:3,  68.8638000488 Today is a sunny day

References

Redis resources to learn about programming with C#/.NET

  • C#/.NET guide, a quick start guide to working with NRedisStack
  • NRedisStack, the Redis client for the .NET programming language

More about ML models