Skip to content

Commit 9f1443c

Browse files
committed
Implements background searching, so far sans UI notification of what's going on.
1 parent 626b69d commit 9f1443c

File tree

1 file changed

+60
-20
lines changed

1 file changed

+60
-20
lines changed

app.py

Lines changed: 60 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -454,19 +454,22 @@ async def get_search_results(searches):
454454
else:
455455
allresults += results[:3]
456456
# Remove extraneous fields
457-
proparray = ["richFacts", "isNavigational", "isFamilyFriendly", "displayUrl", "searchTags", "noCache", "cachedPageUrl", "datePublishedDisplayText", "datePublished", "id", "primaryImageOfPage", "thumbnailUrl"]
457+
proparray = ["dateLastCrawled", "language", "richFacts", "isNavigational", "isFamilyFriendly", "displayUrl", "searchTags", "noCache", "cachedPageUrl", "datePublishedDisplayText", "datePublished", "id", "primaryImageOfPage", "thumbnailUrl"]
458458
for obj in allresults:
459459
for prop in proparray:
460460
if prop in obj:
461461
del obj[prop]
462462
return allresults
463463

464-
async def identify_searches(request_body, request_headers):
465-
system_preamble = "The Original System Prompt that follows is your primary objective, but right now for this chat, you just need to provide a simple list of a few searches you need me to perform in order to fully research and document your suggestion for the feedback and URL provided, as specified in the Original System Prompt. If you can answer with full confidence without any searches, then reply with simply 'No searches required.'. Otherwise, send a comma delimited array of searches with one or several searches you would like me to perform for you to give you all the background data and links you need. Do nothing else but provide the array of search strings or the 'No searches required.' message.\nOriginal System Prompt:\n"
464+
async def identify_searches(request_body, request_headers, Summaries = None):
465+
if Summaries is None:
466+
system_preamble = "The Original System Prompt that follows is your primary objective, but for this chat, you just need to provide a list of a few searches you need me to perform in order to fully research and document your suggestion for the feedback and URL provided, as specified in the Original System Prompt. If you can answer with full confidence without any searches, then reply with simply 'No searches required.'. Otherwise, send a comma delimited array of searches with one or several searches you would like me to perform for you to give you all the background data and links you need. Do nothing else but provide the array of search strings or the 'No searches required.' message.\nOriginal System Prompt:\n"
467+
else:
468+
system_preamble = "The Original System Prompt that follows is your primary objective, but for this chat, you just need to provide a list of a few additional searches you need me to perform in order to fully research and document your suggestion for the feedback and URL provided, as specified in the Original System Prompt. If you can answer with full confidence without any searches, then reply with simply 'No searches required.'. Otherwise, send a comma delimited array with one or several new searches you would like me to perform for you to give you all the background data and links you need. Do nothing else but provide the array of search strings or the 'No searches required.' message. Existing gathered background data that you determined was insufficient so far to answer follows.\n\nExisting Background Data:\n\n" + json.dumps(Summaries, indent=4) + "\n\nOriginal System Prompt:\n"
466469
searches = await send_private_chat(request_body, request_headers, system_preamble)
467470
if isinstance(searches, str):
468471
if searches == "No searches required.":
469-
return ""
472+
return None
470473
else:
471474
if searches[0] != "[":
472475
searches = "[" + searches
@@ -481,31 +484,68 @@ async def get_urls_to_browse(request_body, request_headers, searches):
481484
return "Search error."
482485
else:
483486
strsearchresults = json.dumps(searchresults, indent=4)
484-
system_prompt = "You are tasked with helping content developers resolve customer feedback on their content on learn.microsoft.com. Right now, you've searched and identified the following list of potential URLs for further research. Return nothing except an array of strings, with each string being a URL we should browse to research further, so we can fully address the feedback and document our sources. Here is the list of possible sites we can browse:\n\n" + strsearchresults
487+
system_prompt = "You are tasked with helping content developers resolve customer feedback on their content on learn.microsoft.com. Right now, you've searched and identified the following list of potential URLs for further research. Return nothing except an array of strings, with each string being a URL we should browse to research further, so we can fully address the feedback and document our sources. Prefer Microsoft sources but use external sources too if necessary to answer well and provide references for everything you state. Here is the list of possible sites we can browse:\n\n" + strsearchresults
485488
URLsToBrowse = await send_private_chat(request_body, request_headers, None, system_prompt)
486489
return URLsToBrowse
487490

488-
async def search_and_add_background_references(request_body, request_headers):
489-
searches = await identify_searches(request_body, request_headers)
490-
URLsToBrowse = await get_urls_to_browse(request_body, request_headers, searches)
491-
print(f"Web pages to browse: {URLsToBrowse}")
492-
493-
491+
async def get_article_summaries(request_body, request_headers, URLsToBrowse):
492+
Summaries = None
493+
URLsToBrowse = json.loads(URLsToBrowse)
494+
for URL in URLsToBrowse:
495+
system_prompt = "You are tasked with helping content developers resolve customer feedback on their content on learn.microsoft.com. Right now, you've identified the following URL for further research: " + URL + ". Your task now is to provide a summary of relevant content on the page that will help us address the feedback on the URL provided by the user and document current sources. Return nothing except your summary of the key points and any important quotes the content on the page in a single string.\n\n"
496+
summary = await send_private_chat(request_body, request_headers, None, system_prompt)
497+
summary = json.loads("{\"URL\" : \"" + URL + "\",\n\"summary\" : " + json.dumps(summary) + "}")
498+
if Summaries is None:
499+
Summaries = [summary]
500+
else:
501+
Summaries.append(summary)
502+
return Summaries
503+
504+
async def is_background_info_sufficient(request_body, request_headers, Summaries):
505+
strSummaries = json.dumps(Summaries, indent=4)
506+
system_prompt = "You are tasked with helping content developers resolve customer feedback on their content on learn.microsoft.com. Right now, you've summarized the content of the URLs you've identified for further research. Review the summaries below and determine if you have enough background information to fully address the feedback on the URL provided by the user and document current sources. If you need more information, reply with 'More information needed.' If you have enough information, reply with 'Sufficient information.'\n\n" + strSummaries
507+
response = await send_private_chat(request_body, request_headers, None, system_prompt)
508+
if response == "More information needed.":
509+
return False
510+
else:
511+
return True
494512

513+
async def search_and_add_background_references(request_body, request_headers):
514+
NeedsMoreSummaries = True
515+
Summaries = None
516+
while NeedsMoreSummaries:
517+
518+
if Summaries is None:
519+
searches = await identify_searches(request_body, request_headers)
520+
else:
521+
searches = await identify_searches(request_body, request_headers, Summaries)
522+
523+
if searches is None:
524+
return None
525+
526+
URLsToBrowse = await get_urls_to_browse(request_body, request_headers, searches)
527+
if URLsToBrowse == "Search error.":
528+
return "Search error."
529+
530+
if (Summaries is None):
531+
Summaries = await get_article_summaries(request_body, request_headers, URLsToBrowse)
532+
else:
533+
newSummaries = await get_article_summaries(request_body, request_headers, URLsToBrowse)
534+
Summaries += newSummaries
535+
536+
AreWeDone = await is_background_info_sufficient(request_body, request_headers, Summaries)
537+
if AreWeDone:
538+
NeedsMoreSummaries = False
495539

496-
# And so on, back and forth as much as necessary, calling search API as necessary,
497-
# in a loop, until chat is confident it has all the necessary background details
498-
# compiled to fully answer and document ground truth for its responses.
499-
500-
# Return injected relevant summaries, quotes, and source URLs for background information to the user question, if chat deems search background is necessary.
501-
return "Be sure to slip in a reference to https://www.foo.bar in your response!"
540+
return "Use the following background references to thoroughly document your answer for the customer in the Primary System Message at the end.\n\nBackground References:\n\n" + json.dumps(Summaries, indent=4) + "\n\nPrimary System Message:"
502541

503542
async def conversation_internal(request_body, request_headers):
504543
try:
505544
system_preamble = await search_and_add_background_references(request_body, request_headers)
506-
507-
# Execute the original chat request, altering the system message
508-
result = await stream_chat_request(request_body, request_headers, system_preamble)
545+
if system_preamble != "Search error.":
546+
result = await stream_chat_request(request_body, request_headers, system_preamble)
547+
else:
548+
result = await stream_chat_request(request_body, request_headers, "NOTE: An error occurred while searching for background information. Please inform the user that you were unable to search to validate results, but do your best to answer regardless.\n\nPrimary System Message:\n\n")
509549
response = await make_response(format_as_ndjson(result))
510550
response.timeout = None
511551
response.mimetype = "application/json-lines"

0 commit comments

Comments
 (0)