Query artifacts¶

Here, we’ll query artifacts and inspect their metadata.

This guide can be skipped if you are only interested in how to leverage the overall collection.

import lamindb as ln
import bionty as bt

💡 connected lamindb: testuser1/test-scrna

ln.settings.transform.stem_uid = "agayZTonayqA"
ln.settings.transform.version = "1"
ln.track()

💡 notebook imports: bionty==0.43.1 lamindb==0.72.1

💡 saved: Transform(uid='agayZTonayqA5zKv', version='1', name='Query artifacts', key='scrna3', type='notebook', created_by_id=1, updated_at='2024-05-29 09:58:47 UTC')

💡 saved: Run(uid='YaiyFV65P7CGt85tZiOs', transform_id=3, created_by_id=1)

Run(uid='YaiyFV65P7CGt85tZiOs', started_at='2024-05-29 09:58:47 UTC', is_consecutive=True, transform_id=3, created_by_id=1)

Query artifacts by provenance metadata¶

users = ln.User.lookup()

ln.Transform.filter(created_by=users.testuser1).search("scrna").df()

	uid	version	name	key	description	type	reference	reference_type	latest_report_id	source_code_id	created_by_id	updated_at
id
1	Nv48yAceNSh85zKv	1	scRNA-seq	scrna	None	notebook	None	None	None	None	1	2024-05-29 09:56:21.663230+00:00
2	ManDYgmftZ8C5zKv	1	Standardize and append a batch of data	scrna2	None	notebook	None	None	None	None	1	2024-05-29 09:58:22.270273+00:00
3	agayZTonayqA5zKv	1	Query artifacts	scrna3	None	notebook	None	None	None	None	1	2024-05-29 09:58:47.794891+00:00

transform = ln.Transform.filter(uid="Nv48yAceNSh85zKv").one()

ln.Artifact.filter(transform=transform).df()

	uid	version	description	key	suffix	accessor	size	hash	hash_type	n_objects	n_observations	visibility	key_is_virtual	storage_id	transform_id	run_id	created_by_id	updated_at
id
1	jYoAn7QVoNLYOZQVvpdO	None	Human immune cells from Conde22	None	.h5ad	AnnData	57612943	9sXda5E7BYiVoDOQkTC0KB	sha1-fl	None	1648	1	True	1	1	1	1	2024-05-29 09:58:16.608457+00:00

Query artifacts by biological metadata¶

organism = bt.Organism.lookup()
tissues = bt.Tissue.lookup()

query = ln.Artifact.filter(
    organisms=organism.human,
    tissues=tissues.bone_marrow,
)

query.df()

	uid	version	description	key	suffix	accessor	size	hash	hash_type	n_objects	n_observations	visibility	key_is_virtual	storage_id	transform_id	run_id	created_by_id	updated_at
id

Inspect artifact metadata¶

query_set = ln.Artifact.filter().all()
artifact1, artifact2 = query_set[0], query_set[1]

artifact1.describe()

Artifact(uid='jYoAn7QVoNLYOZQVvpdO', description='Human immune cells from Conde22', suffix='.h5ad', accessor='AnnData', size=57612943, hash='9sXda5E7BYiVoDOQkTC0KB', hash_type='sha1-fl', n_observations=1648, visibility=1, key_is_virtual=True, updated_at='2024-05-29 09:58:16 UTC')
  Provenance
    .created_by = 'testuser1'
    .storage = '/home/runner/work/lamin-usecases/lamin-usecases/docs/test-scrna'
    .transform = 'scRNA-seq'
    .run = '2024-05-29 09:56:21 UTC'
    .input_of = ["'2024-05-29 09:58:22 UTC'"]
  Labels
    .tissues = 'blood', 'thoracic lymph node', 'spleen', 'lung', 'mesenteric lymph node', 'lamina propria', 'liver', 'jejunal epithelium', 'omentum', 'bone marrow'
    .cell_types = 'classical monocyte', 'T follicular helper cell', 'memory B cell', 'alveolar macrophage', 'naive thymus-derived CD4-positive, alpha-beta T cell', 'effector memory CD8-positive, alpha-beta T cell, terminally differentiated', 'alpha-beta T cell', 'CD4-positive helper T cell', 'naive thymus-derived CD8-positive, alpha-beta T cell', 'macrophage'
    .experimental_factors = '10x 3' v3', '10x 5' v2', '10x 5' v1'
    .ulabels = 'D496', '621B', 'A29', 'A36', 'A35', '637C', 'A52', 'A37', 'D503', '640C'
  Features
    'donor' = 'D496', '621B', 'A29', 'A36', 'A35', '637C', 'A52', 'A37', 'D503', '640C'
    'tissue' = 'blood', 'thoracic lymph node', 'spleen', 'lung', 'mesenteric lymph node', 'lamina propria', 'liver', 'jejunal epithelium', 'omentum', 'bone marrow'
    'cell_type' = 'classical monocyte', 'T follicular helper cell', 'memory B cell', 'alveolar macrophage', 'naive thymus-derived CD4-positive, alpha-beta T cell', 'effector memory CD8-positive, alpha-beta T cell, terminally differentiated', 'alpha-beta T cell', 'CD4-positive helper T cell', 'naive thymus-derived CD8-positive, alpha-beta T cell', 'macrophage'
    'assay' = '10x 3' v3', '10x 5' v2', '10x 5' v1'
  Feature sets
    'var' = 'MIR1302-2HG', 'FAM138A', 'OR4F5', 'None', 'OR4F29', 'OR4F16', 'LINC01409', 'FAM87B', 'LINC01128', 'LINC00115', 'FAM41C'
    'obs' = 'donor', 'tissue', 'cell_type', 'assay'

artifact1.view_lineage()

_images/e7ac4b92e99e0e4dc9cf2d310db4f6df8233e56640bd96e4bba2ad8ba8e2d0b6.svg

artifact2.describe()

Artifact(uid='ZJM6YRSVbpt6HX5geETX', description='10x reference adata', suffix='.h5ad', accessor='AnnData', size=857752, hash='PnpU6XI5Fbzwc49XgrgdNg', hash_type='md5', n_observations=70, visibility=1, key_is_virtual=True, updated_at='2024-05-29 09:58:40 UTC')
  Provenance
    .created_by = 'testuser1'
    .storage = '/home/runner/work/lamin-usecases/lamin-usecases/docs/test-scrna'
    .transform = 'Standardize and append a batch of data'
    .run = '2024-05-29 09:58:22 UTC'
  Labels
    .cell_types = 'dendritic cell', 'CD4-positive helper T cell', 'effector memory CD4-positive, alpha-beta T cell, terminally differentiated', 'cytotoxic T cell', 'CD8-positive, CD25-positive, alpha-beta regulatory T cell', 'CD14-positive, CD16-positive monocyte', 'CD16-positive, CD56-dim natural killer cell, human', 'B cell, CD19-positive', 'CD38-positive naive B cell'
  Features
    'cell_type' = 'dendritic cell', 'CD4-positive helper T cell', 'effector memory CD4-positive, alpha-beta T cell, terminally differentiated', 'cytotoxic T cell', 'CD8-positive, CD25-positive, alpha-beta regulatory T cell', 'CD14-positive, CD16-positive monocyte', 'CD16-positive, CD56-dim natural killer cell, human', 'B cell, CD19-positive', 'CD38-positive naive B cell'
  Feature sets
    'var' = 'TLE5', 'S1PR4', 'CD164', 'SMIM24', 'DCAF10', 'RAB13', 'TPM3', 'HES4', 'HAX1', 'ADD3', 'GSTK1', 'GTF3C6', 'SNX2', 'ACAA1', 'MATK', 'ZYX', 'JAML', 'CD3E', 'CD3D', 'EXOG'
    'obs' = 'cell_type'

artifact2.view_lineage()

_images/5863d146d159c731c6db7d8d43a5c38c9ceb9827490723f013c61d0713e52011.svg

Compare features¶

Here we compute shared genes:

artifact1_genes = artifact1.features["var"]
artifact2_genes = artifact2.features["var"]

shared_genes = artifact1_genes & artifact2_genes
len(shared_genes)

shared_genes.list("symbol")[:10]

['HES4',
 'TNFRSF4',
 'SSU72',
 'PARK7',
 'RBP7',
 'SRM',
 'MAD2L2',
 'AGTRAP',
 'TNFRSF1B',
 'EFHD2']

Compare cell types¶

artifact1_celltypes = artifact1.cell_types.all()
artifact2_celltypes = artifact2.cell_types.all()

shared_celltypes = artifact1_celltypes & artifact2_celltypes
shared_celltypes_names = shared_celltypes.list("name")
shared_celltypes_names

['CD4-positive helper T cell',
 'CD16-positive, CD56-dim natural killer cell, human']

Load the individual artifacts¶

We could either load the artifacts into memory or access them in backed mode through .backed() to lazily load their content.

Let’s load them into memory:

adata1 = artifact1.load()
adata2 = artifact2.load()

We can now subset the two collections by shared cell types:

adata1_subset = adata1[adata1.obs["cell_type"].isin(shared_celltypes_names)]
adata2_subset = adata2[adata2.obs["cell_type"].isin(shared_celltypes_names)]