Implements background searching, so far sans UI notification of what's going on.

jonburchel · jonburchel · commit 9f1443c402a8 · 2024-07-26T18:51:17.000-04:00
diff --git a/app.py b/app.py
@@ -454,19 +454,22 @@ async def get_search_results(searches):
                 else:
                     allresults += results[:3]
         # Remove extraneous fields
-                proparray = ["richFacts", "isNavigational", "isFamilyFriendly", "displayUrl", "searchTags", "noCache", "cachedPageUrl", "datePublishedDisplayText", "datePublished", "id", "primaryImageOfPage", "thumbnailUrl"]
+                proparray = ["dateLastCrawled", "language", "richFacts", "isNavigational", "isFamilyFriendly", "displayUrl", "searchTags", "noCache", "cachedPageUrl", "datePublishedDisplayText", "datePublished", "id", "primaryImageOfPage", "thumbnailUrl"]
                 for obj in allresults:
                     for prop in proparray:
                         if prop in obj:
                             del obj[prop]
                 return allresults
 
-async def identify_searches(request_body, request_headers):
-        system_preamble = "The Original System Prompt that follows is your primary objective, but right now for this chat, you just need to provide a simple list of a few searches you need me to perform in order to fully research and document your suggestion for the feedback and URL provided, as specified in the Original System Prompt. If you can answer with full confidence without any searches, then reply with simply 'No searches required.'. Otherwise, send a comma delimited array of searches with one or several searches you would like me to perform for you to give you all the background data and links you need. Do nothing else but provide the array of search strings or the 'No searches required.' message.\nOriginal System Prompt:\n"
+async def identify_searches(request_body, request_headers, Summaries = None):
+        if Summaries is None:
+            system_preamble = "The Original System Prompt that follows is your primary objective, but for this chat, you just need to provide a list of a few searches you need me to perform in order to fully research and document your suggestion for the feedback and URL provided, as specified in the Original System Prompt. If you can answer with full confidence without any searches, then reply with simply 'No searches required.'. Otherwise, send a comma delimited array of searches with one or several searches you would like me to perform for you to give you all the background data and links you need. Do nothing else but provide the array of search strings or the 'No searches required.' message.\nOriginal System Prompt:\n"
+        else:
+            system_preamble = "The Original System Prompt that follows is your primary objective, but for this chat, you just need to provide a list of a few additional searches you need me to perform in order to fully research and document your suggestion for the feedback and URL provided, as specified in the Original System Prompt. If you can answer with full confidence without any searches, then reply with simply 'No searches required.'. Otherwise, send a comma delimited array with one or several new searches you would like me to perform for you to give you all the background data and links you need. Do nothing else but provide the array of search strings or the 'No searches required.' message. Existing gathered background data that you determined was insufficient so far to answer follows.\n\nExisting Background Data:\n\n" + json.dumps(Summaries, indent=4) + "\n\nOriginal System Prompt:\n"
         searches = await send_private_chat(request_body, request_headers, system_preamble)
         if isinstance(searches, str):
             if searches == "No searches required.": 
-                return ""
+                return None
             else:
                 if searches[0] != "[":
                     searches = "[" + searches
@@ -481,31 +484,68 @@ async def get_urls_to_browse(request_body, request_headers, searches):
             return "Search error."
         else:
             strsearchresults = json.dumps(searchresults, indent=4)
-            system_prompt = "You are tasked with helping content developers resolve customer feedback on their content on learn.microsoft.com. Right now, you've searched and identified the following list of potential URLs for further research. Return nothing except an array of strings, with each string being a URL we should browse to research further, so we can fully address the feedback and document our sources. Here is the list of possible sites we can browse:\n\n" + strsearchresults
+            system_prompt = "You are tasked with helping content developers resolve customer feedback on their content on learn.microsoft.com. Right now, you've searched and identified the following list of potential URLs for further research. Return nothing except an array of strings, with each string being a URL we should browse to research further, so we can fully address the feedback and document our sources. Prefer Microsoft sources but use external sources too if necessary to answer well and provide references for everything you state. Here is the list of possible sites we can browse:\n\n" + strsearchresults
             URLsToBrowse = await send_private_chat(request_body, request_headers, None, system_prompt)
             return URLsToBrowse
         
-async def search_and_add_background_references(request_body, request_headers):
-        searches = await identify_searches(request_body, request_headers)
-        URLsToBrowse = await get_urls_to_browse(request_body, request_headers, searches)
-        print(f"Web pages to browse: {URLsToBrowse}")
-
-
+async def get_article_summaries(request_body, request_headers, URLsToBrowse):
+        Summaries = None
+        URLsToBrowse = json.loads(URLsToBrowse)
+        for URL in URLsToBrowse:
+            system_prompt = "You are tasked with helping content developers resolve customer feedback on their content on learn.microsoft.com. Right now, you've identified the following URL for further research: " + URL + ". Your task now is to provide a summary of relevant content on the page that will help us address the feedback on the URL provided by the user and document current sources. Return nothing except your summary of the key points and any important quotes the content on the page in a single string.\n\n"
+            summary = await send_private_chat(request_body, request_headers, None, system_prompt)
+            summary = json.loads("{\"URL\" : \"" + URL + "\",\n\"summary\" : " + json.dumps(summary) + "}")
+            if Summaries is None:
+                Summaries = [summary]
+            else:
+                Summaries.append(summary)
+        return Summaries
+
+async def is_background_info_sufficient(request_body, request_headers, Summaries):
+        strSummaries = json.dumps(Summaries, indent=4)
+        system_prompt = "You are tasked with helping content developers resolve customer feedback on their content on learn.microsoft.com. Right now, you've summarized the content of the URLs you've identified for further research. Review the summaries below and determine if you have enough background information to fully address the feedback on the URL provided by the user and document current sources. If you need more information, reply with 'More information needed.' If you have enough information, reply with 'Sufficient information.'\n\n" + strSummaries
+        response = await send_private_chat(request_body, request_headers, None, system_prompt)
+        if response == "More information needed.": 
+            return False
+        else:
+            return True
         
+async def search_and_add_background_references(request_body, request_headers):
+        NeedsMoreSummaries = True
+        Summaries = None
+        while NeedsMoreSummaries:
+            
+            if Summaries is None:
+                searches = await identify_searches(request_body, request_headers)
+            else:
+                searches = await identify_searches(request_body, request_headers, Summaries)
+            
+            if searches is None:
+                return None
+            
+            URLsToBrowse = await get_urls_to_browse(request_body, request_headers, searches)
+            if URLsToBrowse == "Search error.": 
+                return "Search error."       
+            
+            if (Summaries is None):
+                Summaries = await get_article_summaries(request_body, request_headers, URLsToBrowse)
+            else:
+                newSummaries = await get_article_summaries(request_body, request_headers, URLsToBrowse)
+                Summaries += newSummaries
+            
+            AreWeDone = await is_background_info_sufficient(request_body, request_headers, Summaries)
+            if AreWeDone:
+                NeedsMoreSummaries = False
 
-        # And so on, back and forth as much as necessary, calling search API as necessary,
-        # in a loop, until chat is confident it has all the necessary background details 
-        # compiled to fully answer and document ground truth for its responses.
-
-        # Return injected relevant summaries, quotes, and source URLs for background information to the user question, if chat deems search background is necessary.
-        return "Be sure to slip in a reference to https://www.foo.bar in your response!"
+        return "Use the following background references to thoroughly document your answer for the customer in the Primary System Message at the end.\n\nBackground References:\n\n" + json.dumps(Summaries, indent=4) + "\n\nPrimary System Message:"
 
 async def conversation_internal(request_body, request_headers):
     try:
         system_preamble = await search_and_add_background_references(request_body, request_headers)
-
-        # Execute the original chat request, altering the system message
-        result = await stream_chat_request(request_body, request_headers, system_preamble)
+        if system_preamble != "Search error.":
+            result = await stream_chat_request(request_body, request_headers, system_preamble)
+        else:
+            result = await stream_chat_request(request_body, request_headers, "NOTE: An error occurred while searching for background information. Please inform the user that you were unable to search to validate results, but do your best to answer regardless.\n\nPrimary System Message:\n\n")
         response = await make_response(format_as_ndjson(result))
         response.timeout = None
         response.mimetype = "application/json-lines"