diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/answer_length_evaluator/answer_length_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/answer_length_evaluator/answer_length_evaluator.py
index 8ff7f07b85af..0e6e564cdef1 100644
--- a/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/answer_length_evaluator/answer_length_evaluator.py
+++ b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/answer_length_evaluator/answer_length_evaluator.py
@@ -2,13 +2,12 @@
 
 
 class AnswerLengthEvaluator:
-    def __init__(self, *, config: str, threshold, **kwargs):
-        self.config = config
-        self.threshold = threshold
+    def __init__(self, **kwargs):
+        pass
 
     def __call__(self, *args, **kwargs):
         return {
-            "result": evaluate_answer_length(kwargs.get("response")), 
+            "score": evaluate_answer_length(kwargs.get("response")), 
         }
 
 
diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/friendly_evaluator/common_util/util.py b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/friendly_evaluator/common_util/util.py
index aa137276e55c..6b0da0fec4cc 100644
--- a/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/friendly_evaluator/common_util/util.py
+++ b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/friendly_evaluator/common_util/util.py
@@ -12,44 +12,52 @@
 You MUST respond in the following JSON format only:
 {
     "score": <integer 1-5>,
-    "label": "<Pass or Fail>",
     "reason": "<brief reason for the score>",
-    "explanation": "<detailed explanation of why the response received this score>"
+    "explanation": "<detailed explanation of why the response received this score>",
+    "tone": "<the overall tone detected, e.g. warm, neutral, dismissive>",
+    "confidence": "<high, medium, or low confidence in the assessment>"
 }
-
-A score of 3 or above is considered "Pass", below 3 is "Fail".
 """
 
 
-def build_evaluation_messages(query: str, response: str) -> list:
-    """Build the messages list for the LLM evaluation call.
+def build_evaluation_instructions() -> str:
+    """Return the system instructions for the LLM evaluation call.
+
+    :return: The system prompt string for the Responses API.
+    """
+    return FRIENDLINESS_SYSTEM_PROMPT
+
+
+def build_evaluation_input(query: str, response: str) -> str:
+    """Build the user input for the LLM evaluation call.
 
     :param query: The original user query.
     :param response: The response to evaluate for friendliness.
-    :return: A list of message dicts for the chat completion API.
+    :return: A string prompt for the Responses API.
     """
-    return [
-        {"role": "system", "content": FRIENDLINESS_SYSTEM_PROMPT},
-        {
-            "role": "user",
-            "content": (
-                f"Please evaluate the friendliness of the following response.\n\n"
-                f"Original query: {query}\n\n"
-                f"Response to evaluate: {response}"
-            ),
-        },
-    ]
+    return (
+        f"Please evaluate the friendliness of the following response.\n\n"
+        f"Original query: {query}\n\n"
+        f"Response to evaluate: {response}"
+    )
 
 
 def parse_evaluation_result(raw_result: str, threshold: int = 3) -> dict:
     """Parse the LLM's JSON response into a structured evaluation result.
 
+    The return dict has the standard top-level keys (score, label, reason,
+    threshold, passed) and a ``properties`` dict for any extra output fields
+    the evaluator wants to surface.
+
     :param raw_result: The raw string output from the LLM.
     :param threshold: The minimum score to be considered "Pass".
-    :return: A dict with score, label, reason, and explanation.
+    :return: A dict with score, label, reason, threshold, passed, and properties.
     """
     import json
 
+    # Keys that are promoted to the top level of the result
+    top_level_keys = {"score", "label", "reason"}
+
     try:
         # Try to extract JSON from the response (handle markdown code blocks)
         text = raw_result.strip()
@@ -57,17 +65,25 @@ def parse_evaluation_result(raw_result: str, threshold: int = 3) -> dict:
             text = text.split("\n", 1)[1] if "\n" in text else text[3:]
             text = text.rsplit("```", 1)[0]
         result = json.loads(text.strip())
-        score = int(result.get("score", threshold))
+        score = max(1, min(5, int(result.get("score", threshold))))
+        passed = score >= threshold
+
+        # Collect any extra fields returned by the LLM into properties
+        properties = {k: v for k, v in result.items() if k not in top_level_keys}
+
         return {
-            "score": max(1, min(5, score)),
-            "label": result.get("label", "Pass" if score >= threshold else "Fail"),
+            "score": score,
+            "label": "Pass" if passed else "Fail",
             "reason": result.get("reason", "No reason provided"),
-            "explanation": result.get("explanation", "No explanation provided"),
+            "threshold": threshold,
+            "passed": passed,
+            "properties": properties,  # extra metadata surfaced in the evaluation results
         }
     except (json.JSONDecodeError, ValueError, KeyError):
         return {
             "score": threshold,
-            "label": "Pass",
+            "label": "Fail",
             "reason": "Could not parse LLM response",
-            "explanation": f"Raw LLM output: {raw_result}",
+            "threshold": threshold,
+            "passed": False,
         }
diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/friendly_evaluator/friendly_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/friendly_evaluator/friendly_evaluator.py
index 730237af61f5..a72ea35988c6 100644
--- a/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/friendly_evaluator/friendly_evaluator.py
+++ b/sdk/ai/azure-ai-projects/samples/evaluations/custom_evaluators/friendly_evaluator/friendly_evaluator.py
@@ -1,66 +1,42 @@
 """Custom evaluator that uses an LLM to assess the friendliness of a response."""
 
-from openai import AzureOpenAI
-from common_util.util import build_evaluation_messages, parse_evaluation_result
+from openai import OpenAI
+from common_util.util import build_evaluation_instructions, build_evaluation_input, parse_evaluation_result
 
 
 class FriendlyEvaluator:
     """Evaluates how friendly and approachable a response is using an LLM judge.
 
-    This evaluator sends the query and response to an LLM, which returns a
-    friendliness score (1-5), a pass/fail label, a reason, and a detailed explanation.
+    This evaluator sends the query and response to an LLM via the OpenAI Responses
+    API, which returns a friendliness score (1-5), a pass/fail label, a reason,
+    and a detailed explanation.
 
-    :param model_config: A dict containing Azure OpenAI connection info. Expected keys:
-        - azure_endpoint: The Azure OpenAI endpoint URL.
-        - azure_deployment: The deployment/model name.
-        - api_version: The API version (default: "2024-06-01").
-        - api_key: (Optional) The API key. If not provided, DefaultAzureCredential is used.
+    :param api_key: The OpenAI API key.
+    :param model_name: The model_name to use for evaluation (e.g. "gpt-4o").
     :param threshold: The minimum score (1-5) to be considered "Pass" (default: 3).
     """
 
-    def __init__(self, *, model_config: dict, threshold: int = 3, **kwargs):
-        self.model_config = model_config
+    def __init__(self, *, api_key: str, model_name: str, threshold: int = 3, **kwargs):
+        self.client = OpenAI(api_key=api_key)
+        self.model_name = model_name
         self.threshold = threshold
-        api_key = model_config.get("api_key")
-
-        if api_key:
-            self.client = AzureOpenAI(
-                azure_endpoint=model_config["azure_endpoint"],
-                api_key=api_key,
-                api_version=model_config.get("api_version", "2024-06-01"),
-            )
-        else:
-            from azure.identity import DefaultAzureCredential, get_bearer_token_provider
-
-            token_provider = get_bearer_token_provider(
-                DefaultAzureCredential(),
-                "https://cognitiveservices.azure.com/.default",
-            )
-            self.client = AzureOpenAI(
-                azure_endpoint=model_config["azure_endpoint"],
-                azure_ad_token_provider=token_provider,
-                api_version=model_config.get("api_version", "2024-06-01"),
-            )
-
-        self.deployment = model_config["azure_deployment"]
 
     def __call__(self, *, query: str, response: str, **kwargs) -> dict:
         """Evaluate the friendliness of a response.
 
         :param query: The original user query.
         :param response: The response to evaluate.
-        :return: A dict with score, label, reason, and explanation.
+        :return: A dict with score, label, reason, threshold, passed, and properties.
         """
-        messages = build_evaluation_messages(query, response)
-
-        completion = self.client.chat.completions.create(
-            model=self.deployment,
-            messages=messages,
+        result = self.client.responses.create(
+            model=self.model_name,
+            instructions=build_evaluation_instructions(),
+            input=build_evaluation_input(query, response),
             temperature=0.0,
-            max_tokens=500,
+            max_output_tokens=500,
         )
 
-        raw_result = completion.choices[0].message.content
+        raw_result = result.output_text
         if raw_result is None:
-            raise ValueError("No content in completion response")
+            raise ValueError("No content in response")
         return parse_evaluation_result(raw_result, self.threshold)
diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py
index b6d1a9195fbf..8d5abe2d9c55 100644
--- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py
+++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_custom_evaluator.py
@@ -81,11 +81,8 @@
             entry_point="answer_length_evaluator:AnswerLengthEvaluator",
             init_parameters={
                 "type": "object",
-                "properties": {
-                    "config": {"type": "string"}, 
-                    "threshold": {"type": "number"}
-                },
-                "required": ["config", "threshold"],
+                "properties": {},
+                "required": [],
             },
             data_schema={
                 "type": "object",
@@ -96,7 +93,7 @@
                 "required": ["query", "response"],
             },
             metrics={
-                "result": EvaluatorMetric(
+                "score": EvaluatorMetric(
                     type=EvaluatorMetricType.ORDINAL,
                     desirable_direction=EvaluatorMetricDirection.INCREASE,
                     min_value=1,
@@ -140,10 +137,7 @@
             "type": "azure_ai_evaluator",
             "name": evaluator_name,
             "evaluator_name": evaluator_name,
-            "initialization_parameters": {
-                "config": "example config value",
-                "threshold": 3,
-            },
+            "initialization_parameters": {},
         }
     ]
 
@@ -215,13 +209,13 @@
         print("Waiting for evaluation run to complete...")
 
     # ---------------------------------------------------------------
-    # 5. Cleanup (uncomment to delete)
+    # 5. Cleanup 
     # ---------------------------------------------------------------
-    # print("\nCleaning up...")
-    # project_client.beta.evaluators.delete_version(
-    #     name=code_evaluator.name,
-    #     version=code_evaluator.version,
-    # )
-    # client.evals.delete(eval_id=eval_object.id)
-    # print("Cleanup done.")
+    print("\nCleaning up...")
+    project_client.beta.evaluators.delete_version(
+        name=code_evaluator.name,
+        version=code_evaluator.version,
+    )
+    client.evals.delete(eval_id=eval_object.id)
+    print("Cleanup done.")
     print("\nDone - upload, eval creation, and eval run verified successfully.")
diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py
index 67e168d3509a..a54443867fb3 100644
--- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py
+++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_eval_upload_friendly_evaluator.py
@@ -7,13 +7,14 @@
 """
 DESCRIPTION:
     Given an AIProjectClient, this sample demonstrates how to:
-      1. Upload a custom LLM-based evaluator (FriendlyEvaluator) with nested
-         folder structure (common_util/) using `evaluators.upload()`.
-      2. Create an evaluation (eval) that references the uploaded evaluator.
-      3. Run the evaluation with inline data and poll for results.
+      1. Run the FriendlyEvaluator standalone to verify it works locally.
+      2. Upload the evaluator code (with nested folder structure) using
+         ``evaluators.upload()``.
+      3. Create an evaluation (eval) that references the uploaded evaluator.
+      4. Run the evaluation with inline data and poll for results.
 
-    The FriendlyEvaluator calls Azure OpenAI to judge the friendliness of a
-    response and returns score, label, reason, and explanation.
+    The FriendlyEvaluator calls OpenAI Responses API to judge the friendliness
+    of a response and returns score, label, reason, and explanation.
 
 USAGE:
     python sample_eval_upload_friendly_evaluator.py
@@ -24,10 +25,13 @@
 
     Set these environment variables with your own values:
     1) FOUNDRY_PROJECT_ENDPOINT - Required. The Azure AI Project endpoint.
-    2) FOUNDRY_MODEL_NAME - Optional. The name of the model deployment to use for evaluation.
+    2) OPENAI_API_KEY           - Required. The OpenAI API key.
+    3) OPENAI_MODEL             - Optional. The model to use (default: gpt-4o).
 """
 
 import os
+import sys
+import json
 import time
 import random
 import string
@@ -56,12 +60,49 @@
 load_dotenv()
 
 endpoint = os.environ["FOUNDRY_PROJECT_ENDPOINT"]
-model_deployment_name = os.environ.get("FOUNDRY_MODEL_NAME")
-azure_openai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
-azure_openai_api_key = os.environ["AZURE_OPENAI_API_KEY"]
+openai_api_key = os.environ["OPENAI_API_KEY"]
+openai_model = os.environ.get("OPENAI_MODEL")
 
-# The folder containing the FriendlyEvaluator code, including common_util/ subfolder
-local_upload_folder = str(Path(__file__).parent / "custom_evaluators" / "friendly_evaluator")
+# Add the evaluator folder to sys.path so we can import it for local testing
+evaluator_folder = str(Path(__file__).parent / "custom_evaluators" / "friendly_evaluator")
+sys.path.insert(0, evaluator_folder)
+
+from friendly_evaluator import FriendlyEvaluator  # noqa: E402
+
+# ---------------------------------------------------------------
+# 1. Run FriendlyEvaluator standalone to verify it works locally
+# ---------------------------------------------------------------
+print(f"=== Step 1: Standalone FriendlyEvaluator test (model={openai_model}) ===\n")
+
+evaluator = FriendlyEvaluator(api_key=openai_api_key, model_name=openai_model, threshold=3)
+
+test_cases = [
+    {
+        "query": "How do I reset my password?",
+        "response": "Go to settings. Click reset. Done.",
+    },
+    {
+        "query": "How do I reset my password?",
+        "response": (
+            "Great question! I'd be happy to help you reset your password. "
+            "Just head over to Settings > Security > Reset Password, and follow "
+            "the prompts. If you run into any trouble, feel free to ask — I'm here to help! 😊"
+        ),
+    },
+    {
+        "query": "Can you help me with my order?",
+        "response": "Read the FAQ.",
+    },
+]
+
+for i, tc in enumerate(test_cases, 1):
+    print(f"--- Test Case {i} ---")
+    print(f"Query:    {tc['query']}")
+    print(f"Response: {tc['response'][:80]}...")
+    result = evaluator(query=tc["query"], response=tc["response"])
+    print(f"Result:   {json.dumps(result, indent=2)}\n")
+
+print("Standalone test complete.\n")
 
 with (
     DefaultAzureCredential() as credential,
@@ -69,7 +110,7 @@
     project_client.get_openai_client() as client,
 ):
     # ---------------------------------------------------------------
-    # 1. Upload evaluator code and create evaluator version
+    # 2. Upload evaluator code and create evaluator version
     #    The folder structure uploaded is:
     #      friendly_evaluator/
     #        friendly_evaluator.py          <- entry point
@@ -80,6 +121,8 @@
     suffix = "".join(random.choices(string.ascii_lowercase, k=5))
     evaluator_name = f"friendly_evaluator_{suffix}"
 
+    print(f"=== Step 2: Upload evaluator as '{evaluator_name}' ===\n")
+
     evaluator_version = EvaluatorVersion(
         evaluator_type=EvaluatorType.CUSTOM,
         categories=[EvaluatorCategory.QUALITY],
@@ -90,19 +133,17 @@
             init_parameters={
                 "type": "object",
                 "properties": {
-                    "model_config": {
-                        "type": "object",
-                        "description": "Azure OpenAI configuration for the LLM judge",
-                        "properties": {
-                            "azure_endpoint": {"type": "string"},
-                            "api_version": {"type": "string"},
-                            "api_key": {"type": "string"},
-                        },
-                        "required": ["azure_endpoint", "api_key"],
+                    "api_key": {
+                        "type": "string",
+                        "description": "OpenAI API key for the LLM judge",
+                    },
+                    "model_name": {
+                        "type": "string",
+                        "description": "Model name to use for evaluation (e.g. gpt-4o)",
                     },
                     "threshold": {"type": "number"},
                 },
-                "required": ["model_config", "threshold"],
+                "required": ["api_key", "model_name", "threshold"],
             },
             data_schema={
                 "type": "object",
@@ -123,33 +164,32 @@
         ),
     )
 
-    print("Uploading FriendlyEvaluator (with nested common_util folder)...")
     friendly_evaluator = project_client.beta.evaluators.upload(
         name=evaluator_name,
         evaluator_version=evaluator_version,
-        folder=local_upload_folder,
+        folder=evaluator_folder,
     )
 
-    print(f"\nEvaluator created: name={friendly_evaluator.name}, version={friendly_evaluator.version}")
+    print(f"Evaluator created: name={friendly_evaluator.name}, version={friendly_evaluator.version}")
     print(f"Evaluator ID: {friendly_evaluator.id}")
     pprint(friendly_evaluator)
 
     # ---------------------------------------------------------------
-    # 2. Create an evaluation referencing the uploaded evaluator
+    # 3. Create an evaluation referencing the uploaded evaluator
     # ---------------------------------------------------------------
+    print(f"\n=== Step 3: Create evaluation ===\n")
+
     data_source_config = DataSourceConfigCustom(
-        {
-            "type": "custom",
-            "item_schema": {
-                "type": "object",
-                "properties": {
-                    "query": {"type": "string"},
-                    "response": {"type": "string"},
-                },
-                "required": ["query", "response"],
+        type="custom",
+        item_schema={
+            "type": "object",
+            "properties": {
+                "query": {"type": "string"},
+                "response": {"type": "string"},
             },
-            "include_sample_schema": True,
-        }
+            "required": ["query", "response"],
+        },
+        include_sample_schema=True,
     )
 
     testing_criteria = [
@@ -158,13 +198,13 @@
             "name": evaluator_name,
             "evaluator_name": evaluator_name,
             "initialization_parameters": {
-                 "deployment_name": f"{model_deployment_name}", # provide model_config or, deployment name passed is used to construct the model_config for the evaluator. 
-                 "threshold": 3,
+                "api_key": openai_api_key,
+                "model_name": openai_model,
+                "threshold": 3,
             },
         }
     ]
 
-    print("\nCreating evaluation...")
     eval_object = client.evals.create(
         name=f"Friendliness Evaluation - {suffix}",
         data_source_config=data_source_config,
@@ -173,9 +213,10 @@
     print(f"Evaluation created (id: {eval_object.id}, name: {eval_object.name})")
 
     # ---------------------------------------------------------------
-    # 3. Run the evaluation with inline data
+    # 4. Run the evaluation with inline data
     # ---------------------------------------------------------------
-    print("\nCreating evaluation run with inline data...")
+    print(f"\n=== Step 4: Create evaluation run ===\n")
+
     eval_run_object = client.evals.runs.create(
         eval_id=eval_object.id,
         name=f"Friendliness Eval Run - {suffix}",
@@ -218,12 +259,14 @@
     pprint(eval_run_object)
 
     # ---------------------------------------------------------------
-    # 4. Poll for evaluation run completion
+    # 5. Poll for evaluation run completion
     # ---------------------------------------------------------------
+    print("\n=== Step 5: Polling for results ===\n")
+
     while True:
         run = client.evals.runs.retrieve(run_id=eval_run_object.id, eval_id=eval_object.id)
         if run.status in ("completed", "failed"):
-            print(f"\nEvaluation run finished with status: {run.status}")
+            print(f"Evaluation run finished with status: {run.status}")
             output_items = list(client.evals.runs.output_items.list(run_id=run.id, eval_id=eval_object.id))
             pprint(output_items)
             print(f"\nEvaluation run Report URL: {run.report_url}")
@@ -232,13 +275,13 @@
         print("Waiting for evaluation run to complete...")
 
     # ---------------------------------------------------------------
-    # 5. Cleanup (uncomment to delete)
+    # 6. Cleanup
     # ---------------------------------------------------------------
-    # print("\nCleaning up...")
-    # project_client.beta.evaluators.delete_version(
-    #     name=friendly_evaluator.name,
-    #     version=friendly_evaluator.version,
-    # )
-    # client.evals.delete(eval_id=eval_object.id)
-    # print("Cleanup done.")
-    print("\nDone - FriendlyEvaluator upload, eval creation, and eval run verified successfully.")
+    print("\nCleaning up...")
+    project_client.beta.evaluators.delete_version(
+        name=friendly_evaluator.name,
+        version=friendly_evaluator.version,
+    )
+    client.evals.delete(eval_id=eval_object.id)
+    print("Cleanup done.")
+    print("\nDone - FriendlyEvaluator standalone test, upload, eval creation, and eval run verified successfully.")