|
| 1 | +{ |
| 2 | + "cells": [ |
| 3 | + { |
| 4 | + "cell_type": "code", |
| 5 | + "execution_count": 1, |
| 6 | + "id": "initial_id", |
| 7 | + "metadata": { |
| 8 | + "collapsed": true, |
| 9 | + "ExecuteTime": { |
| 10 | + "end_time": "2024-06-23T01:39:45.566588Z", |
| 11 | + "start_time": "2024-06-23T01:39:44.203426Z" |
| 12 | + } |
| 13 | + }, |
| 14 | + "outputs": [], |
| 15 | + "source": [ |
| 16 | + "from linkml_store import Client\n", |
| 17 | + "\n", |
| 18 | + "client = Client()" |
| 19 | + ] |
| 20 | + }, |
| 21 | + { |
| 22 | + "cell_type": "code", |
| 23 | + "execution_count": 2, |
| 24 | + "outputs": [], |
| 25 | + "source": [ |
| 26 | + "db = client.attach_database(\"mongodb://localhost:27017/nmdc\")" |
| 27 | + ], |
| 28 | + "metadata": { |
| 29 | + "collapsed": false, |
| 30 | + "ExecuteTime": { |
| 31 | + "end_time": "2024-06-23T01:39:45.571778Z", |
| 32 | + "start_time": "2024-06-23T01:39:45.566759Z" |
| 33 | + } |
| 34 | + }, |
| 35 | + "id": "73685fd7e60f63b2" |
| 36 | + }, |
| 37 | + { |
| 38 | + "cell_type": "code", |
| 39 | + "execution_count": 3, |
| 40 | + "outputs": [ |
| 41 | + { |
| 42 | + "data": { |
| 43 | + "text/plain": "['nmdc_schema_version',\n 'ids_nmdc_gfs0',\n 'nom_analysis_activity_set',\n 'read_qc_analysis_activity_set',\n 'roles']" |
| 44 | + }, |
| 45 | + "execution_count": 3, |
| 46 | + "metadata": {}, |
| 47 | + "output_type": "execute_result" |
| 48 | + } |
| 49 | + ], |
| 50 | + "source": [ |
| 51 | + "db.list_collection_names()[0:5]" |
| 52 | + ], |
| 53 | + "metadata": { |
| 54 | + "collapsed": false, |
| 55 | + "ExecuteTime": { |
| 56 | + "end_time": "2024-06-23T01:39:45.605798Z", |
| 57 | + "start_time": "2024-06-23T01:39:45.572040Z" |
| 58 | + } |
| 59 | + }, |
| 60 | + "id": "46fac5899adf5b7e" |
| 61 | + }, |
| 62 | + { |
| 63 | + "cell_type": "code", |
| 64 | + "execution_count": 4, |
| 65 | + "outputs": [], |
| 66 | + "source": [ |
| 67 | + "collection = db.get_collection(\"biosample_set\")\n" |
| 68 | + ], |
| 69 | + "metadata": { |
| 70 | + "collapsed": false, |
| 71 | + "ExecuteTime": { |
| 72 | + "end_time": "2024-06-23T01:39:45.608722Z", |
| 73 | + "start_time": "2024-06-23T01:39:45.605907Z" |
| 74 | + } |
| 75 | + }, |
| 76 | + "id": "fdc7d9bbd7a1ecae" |
| 77 | + }, |
| 78 | + { |
| 79 | + "cell_type": "code", |
| 80 | + "execution_count": 5, |
| 81 | + "outputs": [], |
| 82 | + "source": [ |
| 83 | + "qr = collection.find({}, limit=5)" |
| 84 | + ], |
| 85 | + "metadata": { |
| 86 | + "collapsed": false, |
| 87 | + "ExecuteTime": { |
| 88 | + "end_time": "2024-06-23T01:39:45.711560Z", |
| 89 | + "start_time": "2024-06-23T01:39:45.609722Z" |
| 90 | + } |
| 91 | + }, |
| 92 | + "id": "ed5a741307293cfa" |
| 93 | + }, |
| 94 | + { |
| 95 | + "cell_type": "code", |
| 96 | + "execution_count": 6, |
| 97 | + "outputs": [ |
| 98 | + { |
| 99 | + "data": { |
| 100 | + "text/plain": " id name \\\n0 gold:Gb0115231 Sand microcosm microbial communities from a hy... \n1 gold:Gb0115220 Sand microcosm microbial communities from a hy... \n2 gold:Gb0115221 Sand microcosm microbial communities from a hy... \n3 gold:Gb0115228 Sand microcosm microbial communities from a hy... \n4 gold:Gb0115225 Sand microcosm microbial communities from a hy... \n\n description \\\n0 Sterilized sand packs were incubated back in t... \n1 Sterilized sand packs were incubated back in t... \n2 Sterilized sand packs were incubated back in t... \n3 Sterilized sand packs were incubated back in t... \n4 Sterilized sand packs were incubated back in t... \n\n env_broad_scale \\\n0 {'has_raw_value': 'ENVO:01000253', 'term': {'i... \n1 {'has_raw_value': 'ENVO:01000253', 'term': {'i... \n2 {'has_raw_value': 'ENVO:01000253', 'term': {'i... \n3 {'has_raw_value': 'ENVO:01000253', 'term': {'i... \n4 {'has_raw_value': 'ENVO:01000253', 'term': {'i... \n\n env_local_scale \\\n0 {'has_raw_value': 'ENVO:01000621', 'term': {'i... \n1 {'has_raw_value': 'ENVO:01000621', 'term': {'i... \n2 {'has_raw_value': 'ENVO:01000621', 'term': {'i... \n3 {'has_raw_value': 'ENVO:01000621', 'term': {'i... \n4 {'has_raw_value': 'ENVO:01000621', 'term': {'i... \n\n env_medium type \\\n0 {'has_raw_value': 'ENVO:01000017', 'term': {'i... nmdc:Biosample \n1 {'has_raw_value': 'ENVO:01000017', 'term': {'i... nmdc:Biosample \n2 {'has_raw_value': 'ENVO:01000017', 'term': {'i... nmdc:Biosample \n3 {'has_raw_value': 'ENVO:01000017', 'term': {'i... nmdc:Biosample \n4 {'has_raw_value': 'ENVO:01000017', 'term': {'i... nmdc:Biosample \n\n collection_date \\\n0 {'has_raw_value': '2014-11-25'} \n1 {'has_raw_value': '2014-09-23'} \n2 {'has_raw_value': '2014-11-25'} \n3 {'has_raw_value': '2014-05-21'} \n4 {'has_raw_value': '2014-08-12'} \n\n depth \\\n0 {'has_raw_value': '0.5', 'has_numeric_value': ... \n1 {'has_raw_value': '0.5', 'has_numeric_value': ... \n2 {'has_raw_value': '0.5', 'has_numeric_value': ... \n3 {'has_raw_value': '0.5', 'has_numeric_value': ... \n4 {'has_raw_value': '0.5', 'has_numeric_value': ... \n\n geo_loc_name ... habitat \\\n0 {'has_raw_value': 'USA: Columbia River, Washin... ... sand microcosm \n1 {'has_raw_value': 'USA: Columbia River, Washin... ... sand microcosm \n2 {'has_raw_value': 'USA: Columbia River, Washin... ... sand microcosm \n3 {'has_raw_value': 'USA: Columbia River, Washin... ... sand microcosm \n4 {'has_raw_value': 'USA: Columbia River, Washin... ... sand microcosm \n\n location mod_date \\\n0 groundwater-surface water interaction zone in ... 2021-06-17 \n1 groundwater-surface water interaction zone in ... 2021-06-17 \n2 groundwater-surface water interaction zone in ... 2021-06-17 \n3 groundwater-surface water interaction zone in ... 2021-06-17 \n4 groundwater-surface water interaction zone in ... 2021-06-17 \n\n ncbi_taxonomy_name sample_collection_site part_of \\\n0 sediment metagenome sand microcosm [gold:Gs0114663] \n1 sediment metagenome sand microcosm [gold:Gs0114663] \n2 sediment metagenome sand microcosm [gold:Gs0114663] \n3 sediment metagenome sand microcosm [gold:Gs0114663] \n4 sediment metagenome sand microcosm [gold:Gs0114663] \n\n alternative_identifiers insdc_biosample_identifiers samp_name \\\n0 [img.taxon:3300042754] [biosample:SAMN06343877] GW-RW T4_25-Nov-14 \n1 [img.taxon:3300042744] [biosample:SAMN06343866] GW-RW T3_23-Sept-14 \n2 [img.taxon:3300042791] [biosample:SAMN06343867] GW-RW T3_25-Nov-14 \n3 [img.taxon:3300042751] [biosample:SAMN06343874] GW-RW T4_21-May-14 \n4 [img.taxon:3300042748] [biosample:SAMN06343871] GW-RW T4_12-Aug-14 \n\n gold_biosample_identifiers \n0 [gold:Gb0115231] \n1 [gold:Gb0115220] \n2 [gold:Gb0115221] \n3 [gold:Gb0115228] \n4 [gold:Gb0115225] \n\n[5 rows x 28 columns]", |
| 101 | + "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>id</th>\n <th>name</th>\n <th>description</th>\n <th>env_broad_scale</th>\n <th>env_local_scale</th>\n <th>env_medium</th>\n <th>type</th>\n <th>collection_date</th>\n <th>depth</th>\n <th>geo_loc_name</th>\n <th>...</th>\n <th>habitat</th>\n <th>location</th>\n <th>mod_date</th>\n <th>ncbi_taxonomy_name</th>\n <th>sample_collection_site</th>\n <th>part_of</th>\n <th>alternative_identifiers</th>\n <th>insdc_biosample_identifiers</th>\n <th>samp_name</th>\n <th>gold_biosample_identifiers</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>gold:Gb0115231</td>\n <td>Sand microcosm microbial communities from a hy...</td>\n <td>Sterilized sand packs were incubated back in t...</td>\n <td>{'has_raw_value': 'ENVO:01000253', 'term': {'i...</td>\n <td>{'has_raw_value': 'ENVO:01000621', 'term': {'i...</td>\n <td>{'has_raw_value': 'ENVO:01000017', 'term': {'i...</td>\n <td>nmdc:Biosample</td>\n <td>{'has_raw_value': '2014-11-25'}</td>\n <td>{'has_raw_value': '0.5', 'has_numeric_value': ...</td>\n <td>{'has_raw_value': 'USA: Columbia River, Washin...</td>\n <td>...</td>\n <td>sand microcosm</td>\n <td>groundwater-surface water interaction zone in ...</td>\n <td>2021-06-17</td>\n <td>sediment metagenome</td>\n <td>sand microcosm</td>\n <td>[gold:Gs0114663]</td>\n <td>[img.taxon:3300042754]</td>\n <td>[biosample:SAMN06343877]</td>\n <td>GW-RW T4_25-Nov-14</td>\n <td>[gold:Gb0115231]</td>\n </tr>\n <tr>\n <th>1</th>\n <td>gold:Gb0115220</td>\n <td>Sand microcosm microbial communities from a hy...</td>\n <td>Sterilized sand packs were incubated back in t...</td>\n <td>{'has_raw_value': 'ENVO:01000253', 'term': {'i...</td>\n <td>{'has_raw_value': 'ENVO:01000621', 'term': {'i...</td>\n <td>{'has_raw_value': 'ENVO:01000017', 'term': {'i...</td>\n <td>nmdc:Biosample</td>\n <td>{'has_raw_value': '2014-09-23'}</td>\n <td>{'has_raw_value': '0.5', 'has_numeric_value': ...</td>\n <td>{'has_raw_value': 'USA: Columbia River, Washin...</td>\n <td>...</td>\n <td>sand microcosm</td>\n <td>groundwater-surface water interaction zone in ...</td>\n <td>2021-06-17</td>\n <td>sediment metagenome</td>\n <td>sand microcosm</td>\n <td>[gold:Gs0114663]</td>\n <td>[img.taxon:3300042744]</td>\n <td>[biosample:SAMN06343866]</td>\n <td>GW-RW T3_23-Sept-14</td>\n <td>[gold:Gb0115220]</td>\n </tr>\n <tr>\n <th>2</th>\n <td>gold:Gb0115221</td>\n <td>Sand microcosm microbial communities from a hy...</td>\n <td>Sterilized sand packs were incubated back in t...</td>\n <td>{'has_raw_value': 'ENVO:01000253', 'term': {'i...</td>\n <td>{'has_raw_value': 'ENVO:01000621', 'term': {'i...</td>\n <td>{'has_raw_value': 'ENVO:01000017', 'term': {'i...</td>\n <td>nmdc:Biosample</td>\n <td>{'has_raw_value': '2014-11-25'}</td>\n <td>{'has_raw_value': '0.5', 'has_numeric_value': ...</td>\n <td>{'has_raw_value': 'USA: Columbia River, Washin...</td>\n <td>...</td>\n <td>sand microcosm</td>\n <td>groundwater-surface water interaction zone in ...</td>\n <td>2021-06-17</td>\n <td>sediment metagenome</td>\n <td>sand microcosm</td>\n <td>[gold:Gs0114663]</td>\n <td>[img.taxon:3300042791]</td>\n <td>[biosample:SAMN06343867]</td>\n <td>GW-RW T3_25-Nov-14</td>\n <td>[gold:Gb0115221]</td>\n </tr>\n <tr>\n <th>3</th>\n <td>gold:Gb0115228</td>\n <td>Sand microcosm microbial communities from a hy...</td>\n <td>Sterilized sand packs were incubated back in t...</td>\n <td>{'has_raw_value': 'ENVO:01000253', 'term': {'i...</td>\n <td>{'has_raw_value': 'ENVO:01000621', 'term': {'i...</td>\n <td>{'has_raw_value': 'ENVO:01000017', 'term': {'i...</td>\n <td>nmdc:Biosample</td>\n <td>{'has_raw_value': '2014-05-21'}</td>\n <td>{'has_raw_value': '0.5', 'has_numeric_value': ...</td>\n <td>{'has_raw_value': 'USA: Columbia River, Washin...</td>\n <td>...</td>\n <td>sand microcosm</td>\n <td>groundwater-surface water interaction zone in ...</td>\n <td>2021-06-17</td>\n <td>sediment metagenome</td>\n <td>sand microcosm</td>\n <td>[gold:Gs0114663]</td>\n <td>[img.taxon:3300042751]</td>\n <td>[biosample:SAMN06343874]</td>\n <td>GW-RW T4_21-May-14</td>\n <td>[gold:Gb0115228]</td>\n </tr>\n <tr>\n <th>4</th>\n <td>gold:Gb0115225</td>\n <td>Sand microcosm microbial communities from a hy...</td>\n <td>Sterilized sand packs were incubated back in t...</td>\n <td>{'has_raw_value': 'ENVO:01000253', 'term': {'i...</td>\n <td>{'has_raw_value': 'ENVO:01000621', 'term': {'i...</td>\n <td>{'has_raw_value': 'ENVO:01000017', 'term': {'i...</td>\n <td>nmdc:Biosample</td>\n <td>{'has_raw_value': '2014-08-12'}</td>\n <td>{'has_raw_value': '0.5', 'has_numeric_value': ...</td>\n <td>{'has_raw_value': 'USA: Columbia River, Washin...</td>\n <td>...</td>\n <td>sand microcosm</td>\n <td>groundwater-surface water interaction zone in ...</td>\n <td>2021-06-17</td>\n <td>sediment metagenome</td>\n <td>sand microcosm</td>\n <td>[gold:Gs0114663]</td>\n <td>[img.taxon:3300042748]</td>\n <td>[biosample:SAMN06343871]</td>\n <td>GW-RW T4_12-Aug-14</td>\n <td>[gold:Gb0115225]</td>\n </tr>\n </tbody>\n</table>\n<p>5 rows × 28 columns</p>\n</div>" |
| 102 | + }, |
| 103 | + "execution_count": 6, |
| 104 | + "metadata": {}, |
| 105 | + "output_type": "execute_result" |
| 106 | + } |
| 107 | + ], |
| 108 | + "source": [ |
| 109 | + "qr.rows_dataframe" |
| 110 | + ], |
| 111 | + "metadata": { |
| 112 | + "collapsed": false, |
| 113 | + "ExecuteTime": { |
| 114 | + "end_time": "2024-06-23T01:39:45.731557Z", |
| 115 | + "start_time": "2024-06-23T01:39:45.714621Z" |
| 116 | + } |
| 117 | + }, |
| 118 | + "id": "643c81991a449525" |
| 119 | + }, |
| 120 | + { |
| 121 | + "cell_type": "code", |
| 122 | + "execution_count": 6, |
| 123 | + "outputs": [], |
| 124 | + "source": [], |
| 125 | + "metadata": { |
| 126 | + "collapsed": false, |
| 127 | + "ExecuteTime": { |
| 128 | + "end_time": "2024-06-23T01:39:45.731830Z", |
| 129 | + "start_time": "2024-06-23T01:39:45.728994Z" |
| 130 | + } |
| 131 | + }, |
| 132 | + "id": "bb27cf2adebbc0da" |
| 133 | + } |
| 134 | + ], |
| 135 | + "metadata": { |
| 136 | + "kernelspec": { |
| 137 | + "display_name": "Python 3", |
| 138 | + "language": "python", |
| 139 | + "name": "python3" |
| 140 | + }, |
| 141 | + "language_info": { |
| 142 | + "codemirror_mode": { |
| 143 | + "name": "ipython", |
| 144 | + "version": 2 |
| 145 | + }, |
| 146 | + "file_extension": ".py", |
| 147 | + "mimetype": "text/x-python", |
| 148 | + "name": "python", |
| 149 | + "nbconvert_exporter": "python", |
| 150 | + "pygments_lexer": "ipython2", |
| 151 | + "version": "2.7.6" |
| 152 | + } |
| 153 | + }, |
| 154 | + "nbformat": 4, |
| 155 | + "nbformat_minor": 5 |
| 156 | +} |
0 commit comments