Source code for probnum.problems.zoo.linalg._suitesparse_matrix

"""Sparse matrices from the SuiteSparse Matrix Collection."""

import codecs
import csv
import io
import tarfile
from typing import Dict, Union

import numpy as np
import scipy.io

import probnum.linops as linops

# URLs and file paths to data
SUITESPARSE_ROOT_URL = "https://sparse.tamu.edu"
SUITESPARSE_INDEX_URL = SUITESPARSE_ROOT_URL + "/files/ssstats.csv"


[docs]def suitesparse_matrix( name: str, group: str, verbose: bool = False, ) -> "SuiteSparseMatrix": """Sparse matrix from the SuiteSparse Matrix Collection. Download a sparse matrix benchmark from the `SuiteSparse Matrix Collection <https://sparse.tamu.edu/>`_. [1]_ [2]_ Parameters ---------- name : Name of the matrix. group : Group of the matrix. verbose : Print additional information. References ---------- .. [1] Davis, TA and Hu, Y. The University of Florida sparse matrix collection. *ACM Transactions on Mathematical Software (TOMS)* 38.1 (2011): 1-25. .. [2] Kolodziej, Scott P., et al. The SuiteSparse matrix collection website interface. *Journal of Open Source Software* 4.35 (2019): 1244. Examples -------- >>> ssmat = suitesparse_matrix(name="ash85", group="HB") >>> ssmat <SuiteSparseMatrix with shape=(85, 85) and dtype=float64> >>> ssmat.trace() 85.0 """ # Get database index try: import requests # pylint: disable=import-outside-toplevel except ImportError as err: raise ImportError( "Cannot query SuiteSparse Matrix collection without", "optional dependency `requests`. Install ProbNum with", "optional dependencies for the problem zoo via", "`pip install probnum[zoo]` or install requests", "directly: `pip install requests`.", ) from err response = requests.get(SUITESPARSE_INDEX_URL, "r") line_gen = response.iter_lines() for _ in range(2): next(line_gen) # skip lines not part of the matrix table # Read index with custom header fieldnames = [ "group", "name", "nrows", "ncols", "nnz", "real", "logical", "is2d3d", "isspd", "psym", "nsym", "kind", ] databaseindex_reader = csv.DictReader( codecs.iterdecode(line_gen, "utf-8"), fieldnames=fieldnames ) # Query the SuiteSparse Matrix collection if verbose: print("Querying SuiteSparse Collection.") matrix_attr_dict = None matid = 0 for row in databaseindex_reader: matid += 1 if row["group"] == group and row["name"] == name: matrix_attr_dict = row matrix_attr_dict["matid"] = f"{matid}" if matrix_attr_dict is None: raise ValueError( f"Could not find matrix '{name}' in the SuiteSparse database index." ) # Create a SuiteSparseMatrix (and save to file) return SuiteSparseMatrix.from_database_entry(matrix_attr_dict)
class SuiteSparseMatrix(linops.Matrix): """SuiteSparse Matrix. Sparse matrix from the `SuiteSparse Matrix Collection <https://sparse.tamu.edu/>`_. [1]_ [2]_ Parameters ---------- matid : Unique identifier for the matrix in the database. group : Group this matrix belongs to. name : Name of this matrix. nnz : Number of non-zero elements. is2d3d: Does this matrix come from a 2D or 3D discretization? isspd : Is this matrix symmetric, positive definite? psym : Degree of symmetry of the matrix pattern. nsym : Degree of numerical symmetry of the matrix. kind : Information of the problem domain this matrix arises from. References ---------- .. [1] Davis, TA and Hu, Y. The University of Florida sparse matrix collection. *ACM Transactions on Mathematical Software (TOMS)* 38.1 ( 2011): 1-25. .. [2] Kolodziej, Scott P., et al. The SuiteSparse matrix collection website interface. *Journal of Open Source Software* 4.35 (2019): 1244. """ # pylint: disable="too-many-instance-attributes,too-many-arguments,abstract-method" def __init__( self, matid: str, group: str, name: str, nnz: int, is2d3d: bool, isspd: bool, psym: float, nsym: float, kind: str, ): self.matid = matid self.group = group self.name = name self.nnz = nnz self.is2d3d = is2d3d self.isspd = isspd self.psym = psym self.nsym = nsym self.kind = kind super().__init__(A=self._download())
[docs] @classmethod def from_database_entry(cls, database_entry: Dict) -> "SuiteSparseMatrix": """Create a SuiteSparseMatrix object from an entry of the database index. Parameters ---------- database_entry : Dictionary representing one entry from the SuiteSparse database index. """ return cls( matid=database_entry["matid"], group=database_entry["group"], nnz=int(database_entry["nnz"]), is2d3d=bool(int(database_entry["is2d3d"])), isspd=bool(int(database_entry["isspd"])), psym=float(database_entry["psym"]), nsym=float(database_entry["nsym"]), name=database_entry["name"], kind=database_entry["kind"], )
def _download( self, verbose: bool = False ) -> Union[np.ndarray, scipy.sparse.coo_matrix]: """Download and extract file archive containing the sparse matrix. verbose: Print additional information. """ try: import requests # pylint: disable=import-outside-toplevel except ImportError as err: raise ImportError( "Cannot query SuiteSparse Matrix collection without", "optional dependency `requests`. Install ProbNum with", "optional dependencies for the problem zoo via", "`pip install probnum[zoo]` or install requests", "directly: `pip install requests`.", ) from err url = SUITESPARSE_ROOT_URL + f"/MM/{self.group}/{self.name}.tar.gz" response = requests.get(url, stream=True) # Write archive to temporary file if verbose: print("Downloading compressed matrix.") chunk_size = 4096 chunk_iter = response.iter_content(chunk_size=chunk_size) buffer = io.BytesIO() try: from tqdm.auto import tqdm # pylint: disable=import-outside-toplevel with tqdm( total=int(response.headers["content-length"]), desc=self.name, unit="B", unit_scale=True, ) as pbar: for chunk in chunk_iter: buffer.write(chunk) pbar.update(chunk_size) except ImportError: for chunk in chunk_iter: buffer.write(chunk) buffer.seek(0) if verbose: print("Extracting file archive.") with tarfile.open(fileobj=buffer, mode="r:gz") as tar: return scipy.io.mmread(tar.extractfile(tar.getmembers()[0])) @staticmethod def _html_header() -> str: """Header of the HTML representation of a SuiteSparseMatrix.""" return ( "<thead>" + "".join( f"<th>{attr}</th>" for attr in [ "ID", "Group", "Name", "Rows", "Cols", "Nonzeros", "DType", "2D/3D Discretization", "SPD", "Pattern Symmetry", "Numerical Symmetry", "Domain", "Preview", ] ) + "</thead>" ) def _to_html_row(self) -> str: return ( "<tr>" + "".join( f"<td>{str(table_item)}</td>" for table_item in [ self.matid, ( f'<a href="{SUITESPARSE_ROOT_URL + "/" + self.group}"' f' target="_blank">{self.group}</a>' ), ( f'<a href="' f'{SUITESPARSE_ROOT_URL + "/" + self.group + "/" + self.name}" ' f'target="_blank">{self.name}</a>' ), self.shape[0], self.shape[1], self.nnz, self.dtype, self.is2d3d, self.isspd, f"{self.psym:.2}", f"{self.nsym:.2}", self.kind, f'<img src="{SUITESPARSE_ROOT_URL}/files/\ {self.group}/{self.name}.png">', ] ) + "</tr>" ) def _repr_html_(self) -> str: """HTML representation.""" return ( f"<table>{self._html_header()}" + f"<tbody>{self._to_html_row()}</tbody></table>" )