lanl
diff --git a/‎.gitignore
Lines changed: 3 additions & 0 deletions b/‎.gitignore
Lines changed: 3 additions & 0 deletions
diff --git a/‎CITATION.cff
Lines changed: 2 additions & 2 deletions b/‎CITATION.cff
Lines changed: 2 additions & 2 deletions
diff --git a/‎README.md
Lines changed: 2 additions & 1 deletion b/‎README.md
Lines changed: 2 additions & 1 deletion
diff --git a/‎TELF/__init__.py
Lines changed: 2 additions & 1 deletion b/‎TELF/__init__.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎TELF/applications/Cheetah/term_formatter.py
Lines changed: 129 additions & 77 deletions b/‎TELF/applications/Cheetah/term_formatter.py
Lines changed: 129 additions & 77 deletions
diff --git a/‎TELF/applications/Lynx/__init__.py b/‎TELF/applications/Lynx/__init__.py
diff --git a/‎TELF/applications/Lynx/backend/__init__.py b/‎TELF/applications/Lynx/backend/__init__.py
diff --git a/‎TELF/applications/Lynx/frontend/__init__.py b/‎TELF/applications/Lynx/frontend/__init__.py
diff --git a/‎TELF/applications/Lynx/frontend/main.py
Lines changed: 21 additions & 0 deletions b/‎TELF/applications/Lynx/frontend/main.py
Lines changed: 21 additions & 0 deletions
diff --git a/‎TELF/applications/Lynx/frontend/pages/__init__.py b/‎TELF/applications/Lynx/frontend/pages/__init__.py
@@ -1,3 +1,6 @@
+example_out
+example_results
+hidden_keys.py
 example_output/
 VOCAB_CONSOLIDATOR_SubstitutionOperator.p
 VOCAB_CONSOLIDATOR_changes.csv
 
@@ -1,4 +1,4 @@
-version: 0.0.41
+version: 0.0.42
 message: "If you use this software, please cite it as below."
 authors:
   - family-names: Eren
@@ -20,7 +20,7 @@ authors:
   - family-names: Alexandrov
     given-names: Boian
 title: "Tensor Extraction of Latent Features (T-ELF)"
-version: 0.0.41
+version: 0.0.42
 url: https://github.com/lanl/T-ELF
 doi: 10.5281/zenodo.10257897
 date-released: 2023-12-04
@@ -128,6 +128,7 @@ python post_install.py # use the following, for example, for GPU system: <python
 |   Cheetah  |                        Fast search by keywords and phrases                       |    [Link](examples/Cheetah)         |
 |    Bunny   | Dataset generation tool for documents and their citations/references |  [Link](examples/Bunny)  |
 |  Penguin   |         Text storage tool                                    | [Link](examples/Penguin) |
+|  Lynx   |         Streamlit UI                                    | [Link](examples/Lynx) |
 |    Termite   | Knowladge graph building tool | :soon: |
 
 
@@ -136,7 +137,7 @@ python post_install.py # use the following, for example, for GPU system: <python
 | **Example** |                            **Description**                           | **Link** |
 |:----------:|:--------------------------------------------------------------------:|:-----------:|
 |   NM Law Data           |                        Domain specific data for AI and RAG system written in our  [paper](https://arxiv.org/abs/2502.20364) about New Mexico Law that uses the TELF pipeline       |  [Link](examples/NM%20Law%20Data)|
-|    Full TELF Pipeline   | An end-to-end pipeline demonstration, from data collection to analysis | :soon:   |
+|    Full TELF Pipeline   | An end-to-end pipeline demonstration, from collection to analysis |  [Link](examples/Full%20TELF%20Pipeline)  |
 
 
 ## How to Cite T-ELF?
 
@@ -4,4 +4,5 @@
 sys.path += ["pre_processing"]
 sys.path += ["post_processing"]
 sys.path += ["applications"]
-sys.path += ["helpers"]
+sys.path += ["helpers"]
+sys.path += ["pipeline"]
@@ -1,49 +1,76 @@
 import os
 import warnings
-import pandas as pd
-from .cheetah import Cheetah
+from typing import Dict, List, Tuple, Any, Optional, Set
+from ...helpers.terms import resolve_substitution_conflicts
+
 
 class CheetahTermFormatter:
     """
     Loads search terms from a Markdown file and returns them as
     plain strings or dict blocks, with optional category filtering.
-    Can also generate a substitutions lookup dict mapping phrases
-    to underscored forms and back, if substitutions=True.
-    
-    New parameters:
-      all_categories (bool): if True, ignore `category` and
-        `include_general` and include every section.
+
+    Optionally generates a substitution lookup map (with underscore variants),
+    and can drop conflicts if requested.
+
+    Parameters
+    ----------
+    markdown_file : str | Path
+        Path to the .md file to load.
+    lower : bool
+        Whether to lowercase all term headers.
+    category : str | None
+        If set, include only `# Category: <category>` sections.
+    include_general : bool
+        If filtering by category, whether to include pre-category terms.
+    substitutions : bool
+        If True, builds substitution maps.
+    all_categories : bool
+        If True, overrides `category` and `include_general`.
+    drop_conflicts : bool
+        If True, resolve substitution conflicts and prune dropped entries.
+        If False, keep all substitutions as-is (even if conflicting).
     """
-    def __init__(self, markdown_file, lower=False, category=None,
-                 include_general=True, substitutions=False, all_categories=False):
+
+    def __init__(
+        self,
+        markdown_file,
+        lower: bool = False,
+        category: Optional[str] = None,
+        include_general: bool = True,
+        substitutions: bool = False,
+        all_categories: bool = False,
+        drop_conflicts: bool = True,
+    ):
         self.markdown_file    = markdown_file
         self.lower            = lower
         self.category         = category
         self.include_general  = include_general
         self.substitutions    = substitutions
         self.all_categories   = all_categories
+        self.drop_conflicts   = drop_conflicts
 
-        self.substitution_forward = {}
-        self.substitution_reverse = {}
+        self.substitution_forward: Dict[str, str] = {}
+        self.substitution_reverse: Dict[str, str] = {}
 
-        # parse the markdown into self.terms
-        self.terms = self._parse_markdown()
+        # parse markdown → raw terms list
+        self.terms: List[Any] = self._parse_markdown()
 
-        # optionally build lookup table
+        # optionally build lookup tables
         if self.substitutions:
             self._build_substitutions_lookup()
+            if self.drop_conflicts:
+                self._postprocess_conflicts()
 
-
-    def _parse_markdown(self):
-        terms = []
-        current_term = None
-        positives = []
-        negatives = []
-        active_block = False
-        current_section = None
+    # ──────────────────────────────────────────────────────────────── #
+    # markdown parsing                                                #
+    # ──────────────────────────────────────────────────────────────── #
+    def _parse_markdown(self) -> List[Any]:
+        terms: List[Any] = []
+        current_term, positives, negatives = None, [], []
+        active_block, current_section = False, None
 
         try:
-            with open(self.markdown_file, 'r', encoding='utf-8') as f:
+            with open(self.markdown_file, "r", encoding="utf-8") as f:
                 lines = f.readlines()
         except FileNotFoundError:
             warnings.warn(f"File '{self.markdown_file}' not found. Returning empty list.")
@@ -52,120 +79,145 @@ def _parse_markdown(self):
         for raw in lines:
             line = raw.strip()
 
-            # Section header
             if line.startswith("# Category:"):
                 current_section = line.split(":", 1)[1].strip()
                 continue
 
-            # Decide whether to include this section
-            if self.all_categories:
-                include_section = True
-            elif self.category is None:
-                # no filtering → include everything
-                include_section = True
-            else:
-                if current_section is None and self.include_general:
-                    include_section = True
-                else:
-                    include_section = (current_section == self.category)
+            include_section = self.all_categories or self.category is None
+            if self.category and not self.all_categories:
+                include_section = (current_section == self.category) or (
+                    current_section is None and self.include_general
+                )
 
-            # Term header
             if line.startswith("##"):
-                # finish previous block
                 if current_term is not None and active_block:
                     if positives or negatives:
-                        terms.append({
-                            current_term: {
-                                "positives": positives,
-                                "negatives": negatives
-                            }
-                        })
+                        terms.append({current_term: {"positives": positives, "negatives": negatives}})
                     else:
                         terms.append(current_term)
 
-                # reset for new block
-                positives = []
-                negatives = []
+                positives, negatives = [], []
                 header = line.lstrip("#").strip()
                 if self.lower:
                     header = header.lower()
-                current_term = header
-                active_block = include_section
+                current_term  = header
+                active_block  = include_section
 
-            # collect positives / negatives
-            elif active_block and line.lower().startswith("must have:"):
+            elif active_block and line.lower().startswith("positives:"):
                 items = [i.strip() for i in line.split(":", 1)[1].split(",") if i.strip()]
                 positives.extend(items)
-            elif active_block and line.lower().startswith("exclude with:"):
+
+            elif active_block and line.lower().startswith("negatives:"):
                 items = [i.strip() for i in line.split(":", 1)[1].split(",") if i.strip()]
                 negatives.extend(items)
 
-        # final block
         if current_term is not None and active_block:
             if positives or negatives:
-                terms.append({
-                    current_term: {
-                        "positives": positives,
-                        "negatives": negatives
-                    }
-                })
+                terms.append({current_term: {"positives": positives, "negatives": negatives}})
             else:
                 terms.append(current_term)
 
         return terms
 
-    def _build_substitutions_lookup(self):
-        """
-        Build a dict mapping each term to its underscored form and vice versa.
-        """
+    # ──────────────────────────────────────────────────────────────── #
+    # substitutions lookup                                            #
+    # ──────────────────────────────────────────────────────────────── #
+    def _build_substitutions_lookup(self) -> None:
+        """Create forward & reverse maps (no filtering yet)."""
         for entry in self.terms:
             if isinstance(entry, str):
                 term = entry
                 underscored = term.replace(" ", "_")
                 self.substitution_forward[term] = underscored
                 self.substitution_reverse[underscored] = term
-            elif isinstance(entry, dict):
+            else:  # dict
                 for term in entry.keys():
                     underscored = term.replace(" ", "_")
                     self.substitution_forward[term] = underscored
                     self.substitution_reverse[underscored] = term
 
+    def _postprocess_conflicts(self) -> None:
+        """Resolve substitution conflicts and prune dropped terms."""
+        clean_forward, dropped = resolve_substitution_conflicts(
+            self.substitution_forward, warn=True
+        )
+        self.substitution_forward = clean_forward
+
+        # rebuild reverse map
+        rev: Dict[str, List[str]] = {}
+        for src, tgt in clean_forward.items():
+            rev.setdefault(tgt, []).append(src)
+        self.substitution_reverse = rev
+
+        if not dropped:
+            return
+
+        # prune self.terms to match cleaned substitutions
+        pruned_terms: List[Any] = []
+        for entry in self.terms:
+            if isinstance(entry, str):
+                if entry not in dropped:
+                    pruned_terms.append(entry)
+            else:
+                kept = {k: v for k, v in entry.items() if k not in dropped}
+                if kept:
+                    pruned_terms.append(kept)
+        self.terms = pruned_terms
+
+    # ──────────────────────────────────────────────────────────────── #
+    # public access                                                   #
+    # ──────────────────────────────────────────────────────────────── #
+    def get_terms(self) -> List[Any]:
+        return self.terms
+
+    def get_substitution_maps(self) -> Tuple[Dict[str, str], Dict[str, str]]:
+        return self.substitution_forward, self.substitution_reverse
+
+   
+    # ──────────────────────────────────────────────────────────────── #
+    # public helpers                                                  #
+    # ──────────────────────────────────────────────────────────────── #
     def get_terms(self):
         return self.terms
 
     def get_substitution_maps(self):
-        """
-        Return the substitutions lookup dict (empty if substitutions=False).
-        """
+        """Return (forward_map, reverse_map)."""
         return self.substitution_forward, self.substitution_reverse
 
 
+# ═══════════════════════════════════════════════════════════════════ #
+# utility: convert TXT dump → cheetah markdown                       #
+# ═══════════════════════════════════════════════════════════════════ #
 def convert_txt_to_cheetah_markdown(txt_path, markdown_path):
+    """
+    Helper to convert a simple TXT list (optionally containing dict literals)
+    into the markdown format expected by CheetahTermFormatter.
+    """
     import ast
 
-    with open(txt_path, 'r', encoding='utf-8') as f:
+    with open(txt_path, "r", encoding="utf-8") as f:
         lines = [line.strip() for line in f if line.strip()]
 
-    markdown_lines = []
+    md_lines: List[str] = []
 
     for line in lines:
         if line.startswith("{") and line.endswith("}"):
             try:
                 parsed = ast.literal_eval(line)
                 for key, value in parsed.items():
-                    positives = [v.lstrip('+') for v in value if v.startswith('+')]
-                    negatives = [v for v in value if not v.startswith('+')]
-                    markdown_lines.append(f"## {key}")
+                    positives = [v.lstrip("+") for v in value if v.startswith("+")]
+                    negatives = [v for v in value if not v.startswith("+")]
+                    md_lines.append(f"## {key}")
                     if positives:
-                        markdown_lines.append(f"positives: {', '.join(positives)}")
+                        md_lines.append(f"positives: {', '.join(positives)}")
                     if negatives:
-                        markdown_lines.append(f"negatives: {', '.join(negatives)}")
+                        md_lines.append(f"negatives: {', '.join(negatives)}")
             except Exception as e:
                 print(f"Skipping line due to parse error: {line}\nError: {e}")
         else:
-            markdown_lines.append(f"## {line.strip()}")
+            md_lines.append(f"## {line.strip()}")
 
-    with open(markdown_path, 'w', encoding='utf-8') as f:
-        f.write("\n".join(markdown_lines))
+    with open(markdown_path, "w", encoding="utf-8") as f:
+        f.write("\n".join(md_lines))
 
     print(f"Converted markdown saved to: {markdown_path}")
@@ -0,0 +1,21 @@
+import streamlit as st
+import os
+import sys;sys.path.append(os.path.join("pages"))
+import sys;sys.path.append(os.path.join("..", "backend"))
+import sys;sys.path.append(os.path.join(".."))
+
+if "project_loaded" not in st.session_state:
+    st.session_state.project_loaded = False
+
+load_project_page = st.Page(os.path.join("pages", "load_project.py"), title="Load Project", icon=":material/flag:", default=True)
+tree_view_page = st.Page(os.path.join("pages", "tree_view.py"), title="Tree Search", icon=":material/allergy:", default=False)
+document_analysis_view_page = st.Page(os.path.join("pages", "doc_view.py"), title="Document Analysis", icon=":material/lan:", default=False)
+link_view_page = st.Page(os.path.join("pages", "link_view.py"), title="Link Prediction", icon=":material/linked_services:", default=False)
+
+pg = st.navigation(
+    {
+        f"Lynx":[load_project_page],
+        "Views":[tree_view_page, document_analysis_view_page, link_view_page],
+    }
+)
+pg.run()