11
11
import sys
12
12
import time
13
13
from pathlib import Path
14
+ from textwrap import dedent
14
15
from typing import Any
15
16
from urllib .parse import parse_qsl , urlparse
16
17
@@ -176,7 +177,8 @@ def _check_user_scratch_subdir(self, path: Path) -> Path | None:
176
177
# Given a path we will test that SCRATCH_PATH/user/path can be
177
178
# created as a writable directory (or that it already exists
178
179
# as a writable directory). If it can be (or is), we return the
179
- # whole path, and if not, we return None.
180
+ # whole path, and if not, we return None. If we can set it,
181
+ # we also set the SCRATCH_DIR environment variable to point to it.
180
182
#
181
183
# This will only be readable by the user; they can chmod() it if
182
184
# they want to share, but for TMPDIR and DAF_BUTLER_CACHE_DIRECTORY
@@ -196,11 +198,12 @@ def _check_user_scratch_subdir(self, path: Path) -> Path | None:
196
198
self ._logger .warning ("Could not determine user from environment" )
197
199
return None
198
200
schema = self ._env .get ("HOMEDIR_SCHEMA" , "username" )
199
- user_scratch_path = scratch_path / user / path
201
+ user_scratch_dir = scratch_path / user
200
202
# This is pretty ad-hoc, but USDF uses the first letter in the
201
203
# username for both home and scratch
202
204
if schema == "initialThenUsername" :
203
- user_scratch_path = scratch_path / user [0 ] / user / path
205
+ user_scratch_dir = scratch_path / user [0 ] / user
206
+ user_scratch_path = user_scratch_dir / path
204
207
try :
205
208
user_scratch_path .mkdir (parents = True , exist_ok = True , mode = 0o700 )
206
209
except OSError as exc :
@@ -212,6 +215,8 @@ def _check_user_scratch_subdir(self, path: Path) -> Path | None:
212
215
self ._logger .warning (f"Unable to write to { user_scratch_path !s} " )
213
216
return None
214
217
self ._logger .debug (f"Using user scratch path { user_scratch_path !s} " )
218
+ # Set user-specific top dir as SCRATCH_DIR
219
+ self ._env ["SCRATCH_DIR" ] = f"{ user_scratch_dir !s} "
215
220
return user_scratch_path
216
221
217
222
def _set_tmpdir_if_scratch_available (self ) -> None :
@@ -241,16 +246,13 @@ def _set_butler_cache(self) -> None:
241
246
dbcd = self ._env .get (env_v , "" )
242
247
if dbcd :
243
248
self ._logger .debug (
244
- f"Not setting DAF_BUTLER_CACHE_DIRECTORY: already set to"
245
- f" { dbcd } "
249
+ f"Not setting { env_v } : already set to" f" { dbcd } "
246
250
)
247
251
return
248
252
temp_path = self ._check_user_scratch_subdir (Path ("butler_cache" ))
249
253
if temp_path :
250
254
self ._env [env_v ] = str (temp_path )
251
- self ._logger .debug (
252
- f"Set DAF_BUTLER_CACHE_DIRECTORY to { temp_path !s} "
253
- )
255
+ self ._logger .debug (f"Set { env_v } to { temp_path !s} " )
254
256
return
255
257
# In any sane RSP environment, /tmp will not be shared (it will
256
258
# be either tmpfs or on ephemeral storage, and in any case not
@@ -742,37 +744,30 @@ def _set_timeout_variables(self) -> list[str]:
742
744
result .append (f"--{ timeout_map [setting ]} ={ val } " )
743
745
return result
744
746
745
- def _make_abnormal_landing_page (self ) -> None :
746
- # This is very ad-hoc. Revisit after DP1.
747
- # What we're doing is writing in an empty, ephemeral filesystem,
748
- # to drop a document explaining what's going on, and to tweak the
749
- # display settings such that markdown is displayed in its rendered
750
- # form.
751
- abnormal = bool (self ._env .get ("ABNORMAL_STARTUP" , "" ))
752
- if not abnormal :
747
+ def _make_abnormal_startup_environment (self ) -> None :
748
+ # What we're doing is writing (we hope) someplace safe, be that
749
+ # an empty, ephemeral filesystem (such as /tmp in any sanely-configured
750
+ # K8s-based RSP) or in scratch space somewhere.
751
+ #
752
+ # Performance is irrelevant. As we explain to the user, they should
753
+ # not be using this lab for anything other than immediate problem
754
+ # amelioration.
755
+
756
+ # Try a sanity check and ensure that we are in fact in a broken state.
757
+ if not self ._broken :
753
758
return
754
- user = self ._env ["USER" ]
755
- home = self ._env .get ("NUBLADO_HOME" , "" ) or self ._env .get ("HOME" , "" )
756
- if not home :
757
- home = f"/home/{ user } " # We're just guessing at this point.
758
- txt = "# Abnormal startup\n "
759
- txt += "\n Your Lab container did not start normally.\n "
760
- txt += f"Error: `{ self ._env .get ("ABNORMAL_STARTUP_MESSAGE" ,"" )} `\n "
761
- txt += "\n If that looks like a file space error, try using the "
762
- txt += f"terminal to remove unneeded files in `{ home } `. You can "
763
- txt += "use the `quota` command to check how much space is in use. "
764
- txt += "After that, shut down and restart the Lab.\n "
765
- txt += "\n Otherwise, please open an issue with your RSP site"
766
- txt += " administrator.\n "
759
+
760
+ txt = self ._make_abnormal_landing_markdown ()
767
761
s_obj = {"defaultViewers" : {"markdown" : "Markdown Preview" }}
768
762
s_txt = json .dumps (s_obj )
769
763
770
764
try :
771
- welcome = Path ("/tmp/notebooks/tutorials/welcome.md" )
765
+ temphome = self ._env .get ("SCRATCH_DIR" , "/tmp" )
766
+ welcome = Path (temphome ) / "notebooks" / "tutorials" / "welcome.md"
772
767
welcome .parent .mkdir (exist_ok = True , parents = True )
773
768
welcome .write_text (txt )
774
769
settings = (
775
- Path ("/tmp" )
770
+ Path (temphome )
776
771
/ ".jupyter"
777
772
/ "lab"
778
773
/ "user-settings"
@@ -783,21 +778,114 @@ def _make_abnormal_landing_page(self) -> None:
783
778
settings .parent .mkdir (exist_ok = True , parents = True )
784
779
settings .write_text (s_txt )
785
780
except Exception :
786
- self ._logger .exception ("Writing abnormal startup files failed" )
781
+ self ._logger .exception (
782
+ "Writing files to report abnormal startup failed"
783
+ )
784
+
785
+ def _make_abnormal_landing_markdown (self ) -> str :
786
+ user = self ._env ["USER" ]
787
+ home = self ._env .get (
788
+ "NUBLADO_HOME" ,
789
+ self ._env .get (
790
+ "HOME" ,
791
+ f"/home/{ user } " , # Guess, albeit a good one.
792
+ ),
793
+ )
794
+
795
+ errmsg = self ._env .get ("ABNORMAL_STARTUP_MESSAGE" , "<no message>" )
796
+ errcode = self ._env .get ("ABNORMAL_STARTUP_ERRORCODE" , "EUNKNOWN" )
797
+
798
+ self ._logger .error (
799
+ f"Abnormal startup: errorcode { errcode } ; message { errmsg } "
800
+ )
801
+
802
+ open_an_issue = dedent (
803
+ f"""
804
+
805
+ Please open an issue with your RSP site administrator with the
806
+ following information: `{ errmsg } `
807
+ """
808
+ )
809
+
810
+ # Start with generic error text. It's very simple markdown, with a
811
+ # heading and literal text only.
812
+
813
+ txt = dedent ("""
814
+ # Abnormal startup
815
+
816
+ Your Lab container did not start normally.
817
+
818
+ Do not trust this lab for work you want to keep.
819
+
820
+ """ )
821
+
822
+ # Now add error-specific advice.
823
+ match errcode :
824
+ case "EDQUOT" :
825
+ txt += dedent (
826
+ f"""
827
+ You have exceeded your quota. Try using the terminal to
828
+ remove unneeded files in `{ home } `. You can use the
829
+ `quota` command to check your usage.
830
+
831
+ After that, shut down and restart the lab. If that does
832
+ not result in a working lab:
833
+ """
834
+ )
835
+ case "ENOSPC" :
836
+ txt += dedent (
837
+ f"""
838
+ You have run out of filesystem space. Try using the
839
+ terminal to remove unneeded files in `{ home } `. Since the
840
+ filesystem is full, this may not be something you can
841
+ correct.
842
+
843
+ After you have trimmed whatever possible, shut down and
844
+ restart the lab.
845
+
846
+ If that does not result in a working lab:
847
+ """
848
+ )
849
+ case "EROFS" | "EACCES" :
850
+ txt += dedent (
851
+ """
852
+ You do not have permission to write. Ask your RSP
853
+ administrator to check ownership and permissions on your
854
+ directories.
855
+ """
856
+ )
857
+ case "EBADENV" :
858
+ txt += dedent (
859
+ """
860
+ You are missing environment variables necessary for RSP
861
+ operation.
862
+ """
863
+ )
864
+ case _:
865
+ pass
866
+ txt += dedent (open_an_issue )
867
+ return txt
787
868
788
869
def _start (self ) -> None :
789
- abnormal = bool (self ._env .get ("ABNORMAL_STARTUP" , "" ))
790
870
log_level = "DEBUG" if self ._debug else "INFO"
791
871
notebook_dir = f"{ self ._home !s} "
792
- if abnormal :
872
+ if self . _broken :
793
873
self ._logger .warning (
794
874
f"Abnormal startup: { self ._env ['ABNORMAL_STARTUP_MESSAGE' ]} "
795
875
)
796
- self ._make_abnormal_landing_page ()
797
- self ._logger .warning ("Launching with homedir='/tmp'" )
798
- self ._env ["HOME" ] = "/tmp"
799
- os .environ ["HOME" ] = "/tmp"
800
- notebook_dir = "/tmp"
876
+ self ._make_abnormal_startup_environment ()
877
+ #
878
+ # We will check to see if we got SCRATCH_DIR set before we broke,
879
+ # and if so, use that, which would be a user-specific path on a
880
+ # scratch filesystem. If we didn't, we just use "/tmp" and hope
881
+ # for the best. Any reasonably-configured RSP running under K8s
882
+ # will not have a shared "/tmp".
883
+ #
884
+ temphome = self ._env .get ("SCRATCH_DIR" , "/tmp" )
885
+ self ._logger .warning (f"Launching with homedir='{ temphome } '" )
886
+ self ._env ["HOME" ] = temphome
887
+ os .environ ["HOME" ] = temphome
888
+ notebook_dir = temphome
801
889
802
890
cmd = [
803
891
"jupyterhub-singleuser" ,
0 commit comments