From 60f2381f78762c42fd3360671282a6addd3d4293 Mon Sep 17 00:00:00 2001 From: Pablo Marin Date: Tue, 17 Oct 2023 19:41:14 +0000 Subject: [PATCH] error note on notebook 4 --- 01-Load-Data-ACogSearch.ipynb | 17 +- 02-LoadCSVOneToMany-ACogSearch.ipynb | 108 ++++++------ 03-Quering-AOpenAI.ipynb | 246 ++++++++++----------------- 04-Complex-Docs.ipynb | 65 ++++--- credentials.env | 4 +- 5 files changed, 189 insertions(+), 251 deletions(-) diff --git a/01-Load-Data-ACogSearch.ipynb b/01-Load-Data-ACogSearch.ipynb index 087ecfc4..bf63eaf1 100644 --- a/01-Load-Data-ACogSearch.ipynb +++ b/01-Load-Data-ACogSearch.ipynb @@ -582,22 +582,11 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "200\n", - "Status: inProgress\n", - "Items Processed: 100\n", - "True\n" - ] - } - ], + "outputs": [], "source": [ "# Optionally, get indexer status to confirm that it's running\n", "r = requests.get(os.environ['AZURE_SEARCH_ENDPOINT'] + \"/indexers/\" + indexer_name +\n", @@ -643,7 +632,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 28, "metadata": {}, "outputs": [ { diff --git a/02-LoadCSVOneToMany-ACogSearch.ipynb b/02-LoadCSVOneToMany-ACogSearch.ipynb index c52ce377..90e32558 100644 --- a/02-LoadCSVOneToMany-ACogSearch.ipynb +++ b/02-LoadCSVOneToMany-ACogSearch.ipynb @@ -76,7 +76,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "204\n", + "201\n", "True\n" ] } @@ -120,19 +120,19 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "id": "2fbbbd0d-3015-4601-9ef1-7008ad168167", "metadata": {}, "outputs": [], "source": [ "#Download the csv files to disk and inspect using pandas\n", "import pandas as pd\n", - "remote_file_path = \"https://demodatasetsp.blob.core.windows.net/cord19/metadata.csv\"" + "remote_file_path = \"https://demodatasetsp.blob.core.windows.net/cord19/metadata/metadata.csv\"" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "id": "aaac918a-8859-45f5-9519-2cf56bfded88", "metadata": {}, "outputs": [ @@ -148,72 +148,72 @@ "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
 cord_uidsource_xtitleabstractauthorsurlcord_uidsource_xtitleabstractauthorsurl
0ug7v899jPMCClinical features of culture-p...OBJECTIVE: This retrospective ...Madani, Tariq A; Al-Ghamdi, Ai...https://www.ncbi.nlm.nih.gov/pmc/articles/PMC35282/0ug7v899jPMCClinical features of culture-p...OBJECTIVE: This retrospective ...Madani, Tariq A; Al-Ghamdi, Ai...https://www.ncbi.nlm.nih.gov/pmc/articles/PMC35282/
102tnwd4mPMCNitric oxide: a pro-inflammato...Inflammatory diseases of the r...Vliet, Albert van der; Eiseric...https://www.ncbi.nlm.nih.gov/pmc/articles/PMC59543/102tnwd4mPMCNitric oxide: a pro-inflammato...Inflammatory diseases of the r...Vliet, Albert van der; Eiseric...https://www.ncbi.nlm.nih.gov/pmc/articles/PMC59543/
2ejv2xln0PMCSurfactant protein-D and pulmo...Surfactant protein-D (SP-D) pa...Crouch, Erika C...https://www.ncbi.nlm.nih.gov/pmc/articles/PMC59549/2ejv2xln0PMCSurfactant protein-D and pulmo...Surfactant protein-D (SP-D) pa...Crouch, Erika C...https://www.ncbi.nlm.nih.gov/pmc/articles/PMC59549/
32b73a28nPMCRole of endothelin-1 in lung d...Endothelin-1 (ET-1) is a 21 am...Fagan, Karen A; McMurtry, Ivan...https://www.ncbi.nlm.nih.gov/pmc/articles/PMC59574/32b73a28nPMCRole of endothelin-1 in lung d...Endothelin-1 (ET-1) is a 21 am...Fagan, Karen A; McMurtry, Ivan...https://www.ncbi.nlm.nih.gov/pmc/articles/PMC59574/
49785vg6dPMCGene expression in epithelial ...Respiratory syncytial virus (R...Domachowske, Joseph B; Bonvill...https://www.ncbi.nlm.nih.gov/pmc/articles/PMC59580/49785vg6dPMCGene expression in epithelial ...Respiratory syncytial virus (R...Domachowske, Joseph B; Bonvill...https://www.ncbi.nlm.nih.gov/pmc/articles/PMC59580/
\n" ], "text/plain": [ - "" + "" ] }, - "execution_count": 6, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -248,7 +248,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "id": "b46cfa90-28b4-4602-b6ff-743a3407fd72", "metadata": {}, "outputs": [ @@ -336,7 +336,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "id": "5284b80c-9ba6-49d6-8109-5bfdbaa6ddc5", "metadata": {}, "outputs": [ @@ -403,7 +403,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "id": "b87b8ebd-8091-43b6-9124-cc17021cfb78", "metadata": {}, "outputs": [ @@ -469,7 +469,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 17, "id": "6132c041-7213-410e-a206-1a8c7385128e", "metadata": {}, "outputs": [ @@ -479,7 +479,7 @@ "text": [ "200\n", "Status: inProgress\n", - "Items Processed: 20000\n", + "Items Processed: 45000\n", "True\n" ] } @@ -513,7 +513,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 18, "id": "ec359823-3b9f-4b7f-af38-c3f2f916d5fa", "metadata": {}, "outputs": [ diff --git a/03-Quering-AOpenAI.ipynb b/03-Quering-AOpenAI.ipynb index 0fff40a1..bb43ad05 100644 --- a/03-Quering-AOpenAI.ipynb +++ b/03-Quering-AOpenAI.ipynb @@ -131,7 +131,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 4, "id": "b9b53c14-19bd-451f-aa43-7ad27ccfeead", "metadata": {}, "outputs": [], @@ -152,7 +152,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 5, "id": "faf2e30f-e71f-4533-ab52-27d048b80a89", "metadata": {}, "outputs": [ @@ -161,9 +161,9 @@ "output_type": "stream", "text": [ "200\n", - "Index: cogsrch-index-files Results Found: 9823, Results Returned: 10\n", + "Index: cogsrch-index-files Results Found: 470, Results Returned: 10\n", "200\n", - "Index: cogsrch-index-csv Results Found: 78762, Results Returned: 10\n" + "Index: cogsrch-index-csv Results Found: 42870, Results Returned: 10\n" ] } ], @@ -205,7 +205,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 6, "id": "9e938337-602d-4b61-8141-b8c92a5d91da", "metadata": {}, "outputs": [ @@ -242,30 +242,6 @@ "metadata": {}, "output_type": "display_data" }, - { - "data": { - "text/html": [ - "
0610113v1.pdf - score: 3.12
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "The last value establishes a balance between exploration and exploitation, tending to exploration (but without abandoning exploitation altogether)." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "data": { "text/html": [ @@ -290,30 +266,6 @@ "metadata": {}, "output_type": "display_data" }, - { - "data": { - "text/html": [ - "
MC2pt.doc - score: 2.88
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "‘Exploration’ is an appropriate term for the process, because the numerical simulation of systems allows one to 6 explore the system’s behaviour under a wide range of parameter settings and initial conditions. The heuristic value of this experimentation cannot be overestimated." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "data": { "text/html": [ @@ -341,55 +293,7 @@ { "data": { "text/html": [ - "
0511053v1.pdf - score: 2.69
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "This method differentiates between the two forms of exploration mentioned above, however both forms choose the output interface uniformly, although the valid interfaces for Controlled Exploration are slightly constrained for optimization." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
rl-survey.dvi - score: 2.64
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Exploitation versus Exploration: The Single-State CaseOne major di�erence between reinforcement learning and supervised learning is that areinforcement-learner must explicitly explore its environment. In order to highlight theproblems of exploration, we treat a very simple case in this section." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
Lecture Notes in Computer Science: - score: 2.63
" + "
Structure-based exploration and exploitation of the S4 subsite of norovirus 3CL protease in the design of potent and permeable inhibitors. - score: 2.22
" ], "text/plain": [ "" @@ -401,7 +305,7 @@ { "data": { "text/html": [ - "the options can be numerous, but we can name a few obvious ones that are page 15 of 19 also clearly independent between them: experimentation with more exploration-exploitation trade-offs or alternative rl parameters, experimentation with the learning parameters or the input-output repre- sentation of the neural network, experimentation …" + "Structure-based exploration and exploitation of the S4 subsite of norovirus 3CL protease in the design of potent and permeable inhibitors.. Human noroviruses are the primary cause of epidemic and sporadic acute gastroenteritis." ], "text/plain": [ "" @@ -413,7 +317,7 @@ { "data": { "text/html": [ - "
0511004v1.pdf - score: 2.4
" + "
Dangerous liaisons: molecular basis for a syndemic relationship between Kaposi’s sarcoma and P. falciparum malaria - score: 1.8
" ], "text/plain": [ "" @@ -425,7 +329,7 @@ { "data": { "text/html": [ - "The exploita- tion phase can sometimes be “delegated” to some local optimization procedure, whether called as a mutation operator, or systematically applied to all newborn individuals, moving them to the nearest local optimum. In the latter case, the resulting hybrid algorithm is called a memetic al- gorithm." + "Against this background, ongoing studies are rapidly constructing a fascinating new paradigm in which the major host receptors that control parasite invasion (Basigin/CD147) and cyto-adherence (CD36) are, surprisingly, also important targets for exploitation by KSHV." ], "text/plain": [ "" @@ -437,7 +341,7 @@ { "data": { "text/html": [ - "
0604010v2.pdf - score: 2.39
" + "
arXiv:cs/0006012v1 [cs.CL] 5 Jun 2000 - score: 1.76
" ], "text/plain": [ "" @@ -449,7 +353,10 @@ { "data": { "text/html": [ - "From a statistical viewpoint, the exploration bonus corresponds to a model of a non-stationary world, where un- certainty about past experiences increases with elapsed time elapsed. 12 In general, the conditions defined in Sec. 3 require maintaining some type of belief, int the form of a distribution, over the expected return of actions." + "At this point it seems plausible that every dedu\r", + "tionthat has been made \r", + "an be attributed to some individual.1 We have just presented two motivating reasons for produ\r", + "ing systems that are ableto understand human languages, in the guise of a single reason." ], "text/plain": [ "" @@ -461,7 +368,7 @@ { "data": { "text/html": [ - "
arXiv:cs/0604010v1 [cs.AI] 5 Apr 2006 - score: 2.31
" + "
GENSTYLE: exploration and analysis of DNA sequences with genomic signature - score: 1.65
" ], "text/plain": [ "" @@ -473,7 +380,7 @@ { "data": { "text/html": [ - "From a statistical viewpoint, the 10 exploration bonus corresponds to a model of a non-stationary world, where un- certainty about past experiences increases with elapsed time elapsed." + "tools for the exploration and analysis of signatures allow (i) identification of the origin of dna segments (detection of rare species or species for which technical problems prevent fast characterization, such as micro-organisms with slow growth), (ii) analysis of the homogeneity of a genome and isolation of areas with novel functionality …" ], "text/plain": [ "" @@ -485,7 +392,7 @@ { "data": { "text/html": [ - "
Structure-based exploration and exploitation of the S4 subsite of norovirus 3CL protease in the design of potent and permeable inhibitors. - score: 2.22
" + "
Laparoscopic common bile duct exploration. Lessons learned after 200 cases. - score: 1.65
" ], "text/plain": [ "" @@ -497,7 +404,7 @@ { "data": { "text/html": [ - "Structure-based exploration and exploitation of the S4 subsite of norovirus 3CL protease in the design of potent and permeable inhibitors.. Human noroviruses are the primary cause of epidemic and sporadic acute gastroenteritis." + "INTRODUCTION Laparoscopic common bile duct exploration (LCBDE) is a reliable, reproducible and cost-effective treatment for common bile duct stones. Several techniques have been described for choledochotomy closure. AIMS To present our experience and the lessons learned in more than 200 cases of LCBDE." ], "text/plain": [ "" @@ -509,7 +416,7 @@ { "data": { "text/html": [ - "
0503011v1.pdf - score: 2.22
" + "
Is contralateral exploration justified in endoscopic total extraperitoneal repair of clinical unilateral groin hernias - A Prospective cohort study. - score: 1.63
" ], "text/plain": [ "" @@ -521,7 +428,7 @@ { "data": { "text/html": [ - "Artificially promoting the rank of new pages can potentially accelerate this 5 exploitation loss exploration with rank promotion 0 without rank promotion V is it r at e benefit Time l Figure 2: Exploration/exploitation tradeoff. process." + "In contrast to the high incidence (6/46, 13%) of CMIH in the non-exploration cohort, there was only one metachronous occurrence (1/68, 1.4%) after negative contralateral exploration at a median follow-up of longer than 3 yrs (p = 0.02)." ], "text/plain": [ "" @@ -533,7 +440,7 @@ { "data": { "text/html": [ - "
0602053v1.pdf - score: 1.92
" + "
0012003v1.pdf - score: 1.57
" ], "text/plain": [ "" @@ -545,7 +452,7 @@ { "data": { "text/html": [ - "The minimum exploration rate g(Aj) is defined in such a way that the cumulative regret caused by the variance due to exploring at rate g(Aj) over the remaining rounds will almost surely be less than the negative regret already stored in Aj." + "an essential difference between descartes and a straightforward reading of quantum mechanics is that in quantum mechanics the observing, thinking individual and the physical world are both fundamental poles that exist in relation to one another.1 quantum mechanics does not assume an independently existing physical world beyond what it allows an …" ], "text/plain": [ "" @@ -557,7 +464,7 @@ { "data": { "text/html": [ - "
The importance and timing of optic canal exploration and decompression during endoscopic endonasal resection of tuberculum sella and planum sphenoidale meningiomas. - score: 1.87
" + "
A simulator-based resident curriculum for laparoscopic common bile duct exploration. - score: 1.54
" ], "text/plain": [ "" @@ -569,7 +476,7 @@ { "data": { "text/html": [ - "CONCLUSION Exploration and decompression of the OC are feasible, safe, and important to optimize visual outcome and to minimize recurrence in planum sphenoidale and tuberculum sella meningiomas resected endonasally." + "BACKGROUND Laparoscopic common bile duct exploration (LCBDE) remains an underused treatment for choledocholithiasis, likely in part because of a lack of exposure to the procedure during surgery residency. In this study, we implemented a resident LCBDE curriculum using a previously validated procedural simulator." ], "text/plain": [ "" @@ -581,7 +488,7 @@ { "data": { "text/html": [ - "
Girls gone wild: Social isolation induces hyperactivity and exploration in aged female mice - score: 1.86
" + "
Transcystic approach to laparoscopic common bile duct exploration. - score: 1.51
" ], "text/plain": [ "" @@ -593,7 +500,7 @@ { "data": { "text/html": [ - "Rather, isolation increased hyperactivity and exploration, and reduced anxiety-like behavior in the open field and elevated plus maze, findings that have been similarly observed in young female and male mice following early-life isolation." + "The choledochoscope was inserted into the gallbladder through the small opening in the fundus of the gallbladder extracorporeally and was advanced toward the common bile duct via the cystic duct under the guidance of both laparoscopic imaging and endoscopic imaging." ], "text/plain": [ "" @@ -605,7 +512,7 @@ { "data": { "text/html": [ - "
Dangerous liaisons: molecular basis for a syndemic relationship between Kaposi’s sarcoma and P. falciparum malaria - score: 1.8
" + "
Microsoft Word - constraint1.doc - score: 1.48
" ], "text/plain": [ "" @@ -617,7 +524,7 @@ { "data": { "text/html": [ - "Against this background, ongoing studies are rapidly constructing a fascinating new paradigm in which the major host receptors that control parasite invasion (Basigin/CD147) and cyto-adherence (CD36) are, surprisingly, also important targets for exploitation by KSHV." + "Each of those entities is programmed in a different module: 1. model, sets up the structure of the model, that is, it gives the environment of the simulation: range of parameters, initialisations, alternative choices and basic (backward chaining) rules for calculations. 2. prover, generates the dynamics of the simulation." ], "text/plain": [ "" @@ -629,7 +536,7 @@ { "data": { "text/html": [ - "
Walking the dog: exploration of the contact networks between dogs in a community. - score: 1.68
" + "
Laparoscopic transcystic bile duct exploration: the treatment of first choice for common bile duct stones. - score: 1.44
" ], "text/plain": [ "" @@ -641,7 +548,7 @@ { "data": { "text/html": [ - "Despite identification of subgroups of households and locations, we demonstrated high connectivity between dog-owning households, with minimum path lengths of two 'steps' (household-area-household, 74%) or four 'steps' (via two areas, 26%)..\u0000" + "RESULTS There was no difference in age (49 +/- 15 vs. 57 +/- 19, p = 0.7), sex (79% vs. 82% females, p = 0.6), and ASA grade (1.9 +/- 1 vs. 1.8 +/- 1, p = 0.7)." ], "text/plain": [ "" @@ -653,7 +560,7 @@ { "data": { "text/html": [ - "
GENSTYLE: exploration and analysis of DNA sequences with genomic signature - score: 1.65
" + "
arXiv:cs/0003057v1 [cs.LO] 13 Mar 2000 - score: 1.33
" ], "text/plain": [ "" @@ -665,7 +572,7 @@ { "data": { "text/html": [ - "tools for the exploration and analysis of signatures allow (i) identification of the origin of dna segments (detection of rare species or species for which technical problems prevent fast characterization, such as micro-organisms with slow growth), (ii) analysis of the homogeneity of a genome and isolation of areas with novel functionality …" + "It is aimed at allowing the exploration of knowledge bases. It is well-suited for debugging large knowledge bases, by exploiting the modularity of such designs. The package uses XSB as a pre-processing phase, where the well-founded semantics of a program, with respect to a given query, is computed." ], "text/plain": [ "" @@ -677,7 +584,7 @@ { "data": { "text/html": [ - "
Laparoscopic common bile duct exploration. Lessons learned after 200 cases. - score: 1.65
" + "
arXiv:cs/0004001v1 [cs.AI] 3 Apr 2000 - score: 1.24
" ], "text/plain": [ "" @@ -689,7 +596,7 @@ { "data": { "text/html": [ - "INTRODUCTION Laparoscopic common bile duct exploration (LCBDE) is a reliable, reproducible and cost-effective treatment for common bile duct stones. Several techniques have been described for choledochotomy closure. AIMS To present our experience and the lessons learned in more than 200 cases of LCBDE." + "In the following, a solution is always meant in this formal sense. 1 INTRODUCTION 4 the environment and updates its internal state. Then the next cycle follows. It operates according to some function p. We split the input xk into a regular part x′k and a credit ck, often called reinforcement feedback." ], "text/plain": [ "" @@ -701,7 +608,7 @@ { "data": { "text/html": [ - "
Is contralateral exploration justified in endoscopic total extraperitoneal repair of clinical unilateral groin hernias - A Prospective cohort study. - score: 1.63
" + "
0004014v2.pdf - score: 1.03
" ], "text/plain": [ "" @@ -713,7 +620,7 @@ { "data": { "text/html": [ - "In contrast to the high incidence (6/46, 13%) of CMIH in the non-exploration cohort, there was only one metachronous occurrence (1/68, 1.4%) after negative contralateral exploration at a median follow-up of longer than 3 yrs (p = 0.02)." + "energy including controlled fusion, medical science and molecular biology including genetic engineering, space exploitation and cosmology, climate and earth science, materials including semiconductors and composites, machine intelligence and robotics, financial modeling and commerce, and information communication and human-machine interface will …" ], "text/plain": [ "" @@ -801,7 +708,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 7, "id": "eea62a7d-7e0e-4a93-a89c-20c96560c665", "metadata": {}, "outputs": [], @@ -855,7 +762,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 8, "id": "13df9247-e784-4e04-9475-55e672efea47", "metadata": {}, "outputs": [], @@ -867,7 +774,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 9, "id": "7b0520b9-83b2-49fd-ad84-624cb0f15ce1", "metadata": {}, "outputs": [ @@ -891,7 +798,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 10, "id": "dcc7dae3-6b88-4ea6-be43-b178ebc559dc", "metadata": {}, "outputs": [ @@ -900,10 +807,10 @@ "text/plain": [ "{'question': 'Explain the difference between exploration and explotation',\n", " 'language': 'French',\n", - " 'text': \"L'exploration et l'exploitation sont deux concepts distincts qui sont souvent utilisés dans des contextes différents.\\n\\nL'exploration se réfère à l'action d'explorer, de découvrir ou de rechercher de nouvelles opportunités, connaissances ou ressources. C'est un processus de découverte qui implique souvent une certaine incertitude et un risque élevé. L'objectif principal de l'exploration est d'élargir les connaissances et de trouver de nouvelles possibilités.\\n\\nD'autre part, l'exploitation se concentre sur l'utilisation et la mise en valeur des ressources, connaissances ou opportunités déjà connues. C'est un processus plus stable et prévisible, qui vise à maximiser les bénéfices à partir des ressources existantes. L'exploitation implique généralement une utilisation efficace et rentable des ressources déjà découvertes.\\n\\nEn résumé, l'exploration est axée sur la découverte de nouvelles opportunités et connaissances, tandis que l'exploitation se concentre sur l'utilisation et la maximisation des ressources déjà connues.\"}" + " 'text': \"L'exploration et l'exploitation sont deux concepts distincts qui sont souvent utilisés dans des contextes différents.\\n\\nL'exploration se réfère à l'action d'explorer, de découvrir ou de rechercher de nouvelles opportunités, connaissances ou territoires. C'est un processus de découverte et d'expérimentation qui vise à trouver de nouvelles idées, de nouveaux marchés ou de nouvelles ressources. L'exploration implique souvent une certaine dose d'incertitude et de risque, car elle nécessite de sortir de sa zone de confort et d'essayer des choses nouvelles et inconnues.\\n\\nD'un autre côté, l'exploitation se concentre sur l'utilisation et la maximisation des ressources, des connaissances ou des opportunités déjà connues et maîtrisées. C'est le processus de tirer profit de ce qui est déjà connu et établi, en utilisant des méthodes et des stratégies éprouvées pour obtenir des résultats optimaux. L'exploitation est souvent associée à la consolidation, à l'efficacité et à la répétition de ce qui fonctionne déjà.\\n\\nEn résumé, l'exploration est axée sur la découverte de nouvelles possibilités et l'expérimentation, tandis que l'exploitation se concentre sur l'utilisation et la maximisation des ressources et des connaissances déjà acquises.\"}" ] }, - "execution_count": 52, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -988,7 +895,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 11, "id": "12682a1b-df92-49ce-a638-7277103f6cb3", "metadata": {}, "outputs": [], @@ -1008,7 +915,7 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 12, "id": "3bccca45-d1dd-476f-b109-a528b857b6b3", "metadata": {}, "outputs": [ @@ -1016,7 +923,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Number of results: 20\n" + "Number of results: 16\n" ] } ], @@ -1028,7 +935,7 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 13, "id": "7714f38a-daaa-4fc5-a95a-dd025d153216", "metadata": {}, "outputs": [], @@ -1047,7 +954,7 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 14, "id": "2937ba3b-098d-43f8-8498-3534882a5cc7", "metadata": {}, "outputs": [], @@ -1057,7 +964,7 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 15, "id": "f664df30-99c3-4a30-8cb0-42ba3044e5b0", "metadata": {}, "outputs": [ @@ -1065,8 +972,24 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 18 µs, sys: 0 ns, total: 18 µs\n", - "Wall time: 20.5 µs\n" + "Vectorizing 1 chunks from Document: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7206262/\n", + "Vectorizing 1 chunks from Document: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7135900/\n", + "Vectorizing 1 chunks from Document: https://doi.org/10.1016/j.ejmech.2016.11.027; https://www.ncbi.nlm.nih.gov/pubmed/27914364/\n", + "Vectorizing 1 chunks from Document: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3594938/\n", + "Vectorizing 52 chunks from Document: https://demodatasetsp.blob.core.windows.net/arxivcs/0006/0006012v1.pdf\n", + "Vectorizing 1 chunks from Document: https://doi.org/10.1017/s0950268808001544; https://www.ncbi.nlm.nih.gov/pubmed/19017431/\n", + "Vectorizing 1 chunks from Document: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1160249/\n", + "Vectorizing 1 chunks from Document: https://doi.org/10.1016/j.ciresp.2013.02.010; https://www.ncbi.nlm.nih.gov/pubmed/24559592/\n", + "Vectorizing 1 chunks from Document: https://doi.org/10.1016/j.ijsu.2016.10.012; https://www.ncbi.nlm.nih.gov/pubmed/27743897/\n", + "Vectorizing 8 chunks from Document: https://demodatasetsp.blob.core.windows.net/arxivcs/0012/0012003v1.pdf\n", + "Vectorizing 1 chunks from Document: https://doi.org/10.1016/j.surg.2014.06.020; https://www.ncbi.nlm.nih.gov/pubmed/25239339/\n", + "Vectorizing 1 chunks from Document: https://doi.org/10.1111/vsu.12693; https://www.ncbi.nlm.nih.gov/pubmed/28906566/\n", + "Vectorizing 9 chunks from Document: https://demodatasetsp.blob.core.windows.net/arxivcs/0007/0007001v1.pdf\n", + "Vectorizing 2 chunks from Document: https://demodatasetsp.blob.core.windows.net/arxivcs/0003/0003057v1.pdf\n", + "Vectorizing 35 chunks from Document: https://demodatasetsp.blob.core.windows.net/arxivcs/0004/0004001v1.pdf\n", + "Vectorizing 73 chunks from Document: https://demodatasetsp.blob.core.windows.net/arxivcs/0004/0004014v2.pdf\n", + "CPU times: user 8.98 s, sys: 185 ms, total: 9.17 s\n", + "Wall time: 42.2 s\n" ] } ], @@ -1143,7 +1066,7 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 16, "id": "61098bb4-33da-4eb4-94cf-503587337aca", "metadata": {}, "outputs": [ @@ -1180,7 +1103,7 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 17, "id": "7dfb9e39-2542-469d-8f64-4c0c26d79535", "metadata": {}, "outputs": [ @@ -1203,7 +1126,7 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 18, "id": "880885fe-16bd-44bb-9556-7cb3d4989993", "metadata": {}, "outputs": [ @@ -1213,11 +1136,11 @@ "text": [ "System prompt token count: 1669\n", "Max Completion Token count: 1000\n", - "Combined docs (context) token count: 1518\n", + "Combined docs (context) token count: 628\n", "--------\n", - "Requested token count: 4187\n", + "Requested token count: 3297\n", "Token limit for gpt-35-turbo : 4096\n", - "Chain Type selected: map_reduce\n" + "Chain Type selected: stuff\n" ] } ], @@ -1254,7 +1177,7 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 19, "id": "511273b3-256d-4e60-be72-ccd4a74cb885", "metadata": {}, "outputs": [], @@ -1271,7 +1194,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 20, "id": "b99a0c19-d48c-41e9-8d6c-6d9f13d29da3", "metadata": {}, "outputs": [ @@ -1279,8 +1202,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 17.5 ms, sys: 318 µs, total: 17.8 ms\n", - "Wall time: 13 s\n" + "CPU times: user 7.97 ms, sys: 4 µs, total: 7.97 ms\n", + "Wall time: 7.18 s\n" ] } ], @@ -1292,14 +1215,25 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 21, "id": "37f7fa67-f67b-402e-89e3-266d5d6d21d8", "metadata": {}, "outputs": [ { "data": { "text/markdown": [ - "Exploration refers to the process of actively seeking out and discovering new states or actions in the environment, while exploitation involves utilizing the knowledge or information already acquired to maximize rewards. In reinforcement learning, exploration is necessary to discover new strategies or actions that can lead to higher rewards, while exploitation focuses on using known effective strategies to maximize rewards in the current state of the environment[1]. The exploration-exploitation trade-off in reinforcement learning refers to the dilemma of selecting actions to maximize expected return according to the current world model or improving the world model to potentially achieve a higher expected return[2]. Additionally, the term \"exploration and exploitation\" can also be used in the context of designing and synthesizing effective inhibitors of proteases[3]." + "Exploration and exploitation are two different approaches in reinforcement learning. Exploration refers to the process of actively seeking out new and unfamiliar states or actions in order to gather more information about the environment. It involves taking actions that may not yield immediate rewards but have the potential to discover new knowledge. On the other hand, exploitation involves utilizing the existing knowledge or information to maximize the rewards or benefits. It focuses on taking actions that are known to be effective based on previous experiences.\n", + "\n", + "In the context of the first content, the paper discusses the challenge of sparse reward tasks in reinforcement learning and the need for both exploration and exploitation. It introduces two different approaches: self-imitation learning, which emphasizes exploitation by imitating past good trajectories, and exploration bonuses, which enhance exploration by providing intrinsic rewards for visiting novel states. The Explore-then-Exploit (EE) framework is then proposed, which combines these two approaches to strengthen their effects and achieve superior performance in MuJoCo environments with episodic reward settings. [1]\n", + "\n", + "In the context of the second content, the term \"exploit\" is used in a different context. It refers to how enveloped viruses take advantage of the existing routes of membrane traffic to enter and leave host cells. The similarity between viral envelopes and cellular membranes allows animal viruses to exploit these routes and use them for their own purposes. This exploitation of cellular mechanisms is used as a probe to study aspects of intracellular traffic. [2]\n", + "\n", + "In the context of the third content, the term \"exploration\" is used in the context of laparoscopic hernia repair. The study compares the occurrence of contralateral metachronous inguinal hernia (CMIH) after laparoscopic total extraperitoneal (TEP) repair with or without contralateral exploration. Contralateral exploration refers to the exploration of the asymptomatic contralateral inguinal hernia during the surgical procedure. The study found that simultaneous exploration and repair of the incidental defects on the contralateral inguinal region during laparoscopic TEP repair of unilateral inguinal hernia can effectively prevent later CMIH. [3]\n", + "\n", + "References:\n", + "[1] Source: [1]\n", + "[2] Source: [2]\n", + "[3] Source: [3]" ], "text/plain": [ "" @@ -1323,7 +1257,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 22, "id": "11345374-6420-4b36-b061-795d2a804c85", "metadata": {}, "outputs": [], diff --git a/04-Complex-Docs.ipynb b/04-Complex-Docs.ipynb index d3dbe05f..0cf495d8 100644 --- a/04-Complex-Docs.ipynb +++ b/04-Complex-Docs.ipynb @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 7, "id": "15f6044e-463f-4988-bc46-a3c3d641c15c", "metadata": {}, "outputs": [], @@ -81,7 +81,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 8, "id": "331692ba-b68e-4b99-9bae-5057da9a389d", "metadata": {}, "outputs": [], @@ -95,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 9, "id": "594ff0d4-56e3-4bed-843d-28c7a092069b", "metadata": {}, "outputs": [], @@ -123,7 +123,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 10, "id": "0999e24b-6a75-4fa1-9a5f-426cf0f0bdba", "metadata": {}, "outputs": [], @@ -145,7 +145,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 11, "id": "3554f0b7-fee8-4446-a155-5d22dc0f0888", "metadata": {}, "outputs": [ @@ -153,7 +153,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 5/5 [00:03<00:00, 1.66it/s]\n" + "100%|██████████| 5/5 [00:02<00:00, 1.94it/s]\n" ] } ], @@ -182,7 +182,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 12, "id": "c1c63a2f-7a53-4346-8a1f-483cfd159d34", "metadata": {}, "outputs": [ @@ -192,27 +192,27 @@ "text": [ "Extracting Text from Azure_Cognitive_Search_Documentation.pdf ...\n", "Extracting text using PyPDF\n", - "Parsing took: 35.292349 seconds\n", + "Parsing took: 35.197564 seconds\n", "Azure_Cognitive_Search_Documentation.pdf contained 1947 pages\n", "\n", "Extracting Text from Boundaries_When_to_Say_Yes_How_to_Say_No_to_Take_Control_of_Your_Life.pdf ...\n", "Extracting text using PyPDF\n", - "Parsing took: 1.988758 seconds\n", + "Parsing took: 1.992561 seconds\n", "Boundaries_When_to_Say_Yes_How_to_Say_No_to_Take_Control_of_Your_Life.pdf contained 357 pages\n", "\n", "Extracting Text from Fundamentals_of_Physics_Textbook.pdf ...\n", "Extracting text using PyPDF\n", - "Parsing took: 102.817881 seconds\n", + "Parsing took: 104.113405 seconds\n", "Fundamentals_of_Physics_Textbook.pdf contained 1450 pages\n", "\n", "Extracting Text from Made_To_Stick.pdf ...\n", "Extracting text using PyPDF\n", - "Parsing took: 8.007690 seconds\n", + "Parsing took: 7.991316 seconds\n", "Made_To_Stick.pdf contained 225 pages\n", "\n", "Extracting Text from Pere_Riche_Pere_Pauvre.pdf ...\n", "Extracting text using PyPDF\n", - "Parsing took: 1.103460 seconds\n", + "Parsing took: 1.082254 seconds\n", "Pere_Riche_Pere_Pauvre.pdf contained 225 pages\n", "\n" ] @@ -249,7 +249,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 13, "id": "f2a5d62f-b664-4662-a6c9-a1eb2a3c5e11", "metadata": {}, "outputs": [ @@ -258,24 +258,26 @@ "output_type": "stream", "text": [ "Azure_Cognitive_Search_Documentation.pdf \n", - " chunk text: Service update announcements for Azure Cognitive Search can be found on the Azu ...\n", + " chunk text: Month Item\n", + "October Beiersdorf customer story using Azure Cognitive Search . This ...\n", "\n", "Boundaries_When_to_Say_Yes_How_to_Say_No_to_Take_Control_of_Your_Life.pdf \n", - " chunk text: 26\n", - "father said, “Did I hear you right? You don’t think he has a\n", - "problem?”\n", - "“That’ ...\n", + " chunk text: 23\n", + "taking responsibility for others isn’t working. A master of taking\n", + "care of th ...\n", "\n", "Fundamentals_of_Physics_Textbook.pdf \n", - " chunk text: xxiiPREFACEInteractive Learningware.This software guides students through soluti ...\n", + " chunk text: 51-2TIME\n", + "Additional examples, video, and practice available at WileyPLUS1-2TIMEL ...\n", "\n", "Made_To_Stick.pdf \n", - " chunk text: of both fruits rather than all of either. Should we trade? If so, how do \n", - "we go ...\n", + " chunk text: UNEXPECTED \n", + "By FAA edict, a flight attendant must make a safety announce- \n", + "men ...\n", "\n", "Pere_Riche_Pere_Pauvre.pdf \n", - " chunk text: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~\n", - "~ ...\n", + " chunk text: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~\n", + " ...\n", "\n" ] } @@ -296,7 +298,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 14, "id": "801c6bc2-467c-4418-aa7e-ef89a1e20e1c", "metadata": {}, "outputs": [ @@ -305,8 +307,8 @@ "output_type": "stream", "text": [ "Extracting text using Azure Document Intelligence\n", - "CPU times: user 11.6 s, sys: 200 ms, total: 11.8 s\n", - "Wall time: 53.5 s\n" + "CPU times: user 11.8 s, sys: 192 ms, total: 12 s\n", + "Wall time: 1min 23s\n" ] } ], @@ -318,6 +320,17 @@ "book_pages_map[book]= book_map" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "52f8b4b9-c167-49cf-b88b-c6be705200b5", + "metadata": {}, + "outputs": [], + "source": [ + "#Note: If the above command throws an error - Create another form recognizer resource in the azure portal in the same resource group, don't delete it. And try again.\n", + "# This seems to be a transient error." + ] + }, { "cell_type": "code", "execution_count": 10, diff --git a/credentials.env b/credentials.env index bbae0ac4..0b6973f6 100644 --- a/credentials.env +++ b/credentials.env @@ -7,6 +7,7 @@ BING_SEARCH_URL = "https://api.bing.microsoft.com/v7.0/search" BLOB_CONNECTION_STRING="DefaultEndpointsProtocol=https;AccountName=demodatasetsp;AccountKey=QVFgIKPiWB+8f0mH+F7fidVLG7wq1S3WhtAqXOWaMWtr6fZ4frhVgmUzgBSdkmw4VsjoEAo7C2Hn+ASt2Cc5HA==;EndpointSuffix=core.windows.net" BLOB_SAS_TOKEN="?sv=2022-11-02&ss=bf&srt=sco&sp=rltfx&se=2024-10-02T01:02:07Z&st=2023-08-03T17:02:07Z&spr=https&sig=gLxStXFSY6X29OPpPDpBEhoQDdtJNDrMVExNYJ%2BhmBQ%3D" + # Edit with your own azure services values AZURE_SEARCH_ENDPOINT="Enter your Azure Cognitive Search Endpoint ..." AZURE_SEARCH_KEY="Enter your Azure Cognitive Search Key ..." # Make sure is the MANAGEMENT KEY no the query key @@ -24,4 +25,5 @@ SQL_SERVER_PASSWORD="ENTER YOUR VALUE" AZURE_COSMOSDB_ENDPOINT="ENTER YOUR VALUE" AZURE_COSMOSDB_NAME="ENTER YOUR VALUE" AZURE_COSMOSDB_CONTAINER_NAME="ENTER YOUR VALUE" -AZURE_COMOSDB_CONNECTION_STRING="ENTER YOUR VALUE" # Find this in the Keys section \ No newline at end of file +AZURE_COMOSDB_CONNECTION_STRING="ENTER YOUR VALUE" # Find this in the Keys section +