Query artifacts露
Here, we鈥檒l query artifacts and inspect their metadata.
This guide can be skipped if you are only interested in how to leverage the overall collection.
import lamindb as ln
import bionty as bt
馃挕 connected lamindb: testuser1/test-scrna
ln.settings.transform.stem_uid = "agayZTonayqA"
ln.settings.transform.version = "1"
ln.track()
馃挕 notebook imports: bionty==0.43.1 lamindb==0.72.1
馃挕 saved: Transform(uid='agayZTonayqA5zKv', version='1', name='Query artifacts', key='scrna3', type='notebook', created_by_id=1, updated_at='2024-05-29 09:58:47 UTC')
馃挕 saved: Run(uid='YaiyFV65P7CGt85tZiOs', transform_id=3, created_by_id=1)
Run(uid='YaiyFV65P7CGt85tZiOs', started_at='2024-05-29 09:58:47 UTC', is_consecutive=True, transform_id=3, created_by_id=1)
Query artifacts by provenance metadata露
users = ln.User.lookup()
ln.Transform.filter(created_by=users.testuser1).search("scrna").df()
uid | version | name | key | description | type | reference | reference_type | latest_report_id | source_code_id | created_by_id | updated_at | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
id | ||||||||||||
1 | Nv48yAceNSh85zKv | 1 | scRNA-seq | scrna | None | notebook | None | None | None | None | 1 | 2024-05-29 09:56:21.663230+00:00 |
2 | ManDYgmftZ8C5zKv | 1 | Standardize and append a batch of data | scrna2 | None | notebook | None | None | None | None | 1 | 2024-05-29 09:58:22.270273+00:00 |
3 | agayZTonayqA5zKv | 1 | Query artifacts | scrna3 | None | notebook | None | None | None | None | 1 | 2024-05-29 09:58:47.794891+00:00 |
transform = ln.Transform.filter(uid="Nv48yAceNSh85zKv").one()
ln.Artifact.filter(transform=transform).df()
uid | version | description | key | suffix | accessor | size | hash | hash_type | n_objects | n_observations | visibility | key_is_virtual | storage_id | transform_id | run_id | created_by_id | updated_at | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
id | ||||||||||||||||||
1 | jYoAn7QVoNLYOZQVvpdO | None | Human immune cells from Conde22 | None | .h5ad | AnnData | 57612943 | 9sXda5E7BYiVoDOQkTC0KB | sha1-fl | None | 1648 | 1 | True | 1 | 1 | 1 | 1 | 2024-05-29 09:58:16.608457+00:00 |
Query artifacts by biological metadata露
organism = bt.Organism.lookup()
tissues = bt.Tissue.lookup()
query = ln.Artifact.filter(
organisms=organism.human,
tissues=tissues.bone_marrow,
)
query.df()
uid | version | description | key | suffix | accessor | size | hash | hash_type | n_objects | n_observations | visibility | key_is_virtual | storage_id | transform_id | run_id | created_by_id | updated_at | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
id |
Inspect artifact metadata露
query_set = ln.Artifact.filter().all()
artifact1, artifact2 = query_set[0], query_set[1]
artifact1.describe()
Artifact(uid='jYoAn7QVoNLYOZQVvpdO', description='Human immune cells from Conde22', suffix='.h5ad', accessor='AnnData', size=57612943, hash='9sXda5E7BYiVoDOQkTC0KB', hash_type='sha1-fl', n_observations=1648, visibility=1, key_is_virtual=True, updated_at='2024-05-29 09:58:16 UTC')
Provenance
.created_by = 'testuser1'
.storage = '/home/runner/work/lamin-usecases/lamin-usecases/docs/test-scrna'
.transform = 'scRNA-seq'
.run = '2024-05-29 09:56:21 UTC'
.input_of = ["'2024-05-29 09:58:22 UTC'"]
Labels
.tissues = 'blood', 'thoracic lymph node', 'spleen', 'lung', 'mesenteric lymph node', 'lamina propria', 'liver', 'jejunal epithelium', 'omentum', 'bone marrow'
.cell_types = 'classical monocyte', 'T follicular helper cell', 'memory B cell', 'alveolar macrophage', 'naive thymus-derived CD4-positive, alpha-beta T cell', 'effector memory CD8-positive, alpha-beta T cell, terminally differentiated', 'alpha-beta T cell', 'CD4-positive helper T cell', 'naive thymus-derived CD8-positive, alpha-beta T cell', 'macrophage'
.experimental_factors = '10x 3' v3', '10x 5' v2', '10x 5' v1'
.ulabels = 'D496', '621B', 'A29', 'A36', 'A35', '637C', 'A52', 'A37', 'D503', '640C'
Features
'donor' = 'D496', '621B', 'A29', 'A36', 'A35', '637C', 'A52', 'A37', 'D503', '640C'
'tissue' = 'blood', 'thoracic lymph node', 'spleen', 'lung', 'mesenteric lymph node', 'lamina propria', 'liver', 'jejunal epithelium', 'omentum', 'bone marrow'
'cell_type' = 'classical monocyte', 'T follicular helper cell', 'memory B cell', 'alveolar macrophage', 'naive thymus-derived CD4-positive, alpha-beta T cell', 'effector memory CD8-positive, alpha-beta T cell, terminally differentiated', 'alpha-beta T cell', 'CD4-positive helper T cell', 'naive thymus-derived CD8-positive, alpha-beta T cell', 'macrophage'
'assay' = '10x 3' v3', '10x 5' v2', '10x 5' v1'
Feature sets
'var' = 'MIR1302-2HG', 'FAM138A', 'OR4F5', 'None', 'OR4F29', 'OR4F16', 'LINC01409', 'FAM87B', 'LINC01128', 'LINC00115', 'FAM41C'
'obs' = 'donor', 'tissue', 'cell_type', 'assay'
artifact2.describe()
Artifact(uid='ZJM6YRSVbpt6HX5geETX', description='10x reference adata', suffix='.h5ad', accessor='AnnData', size=857752, hash='PnpU6XI5Fbzwc49XgrgdNg', hash_type='md5', n_observations=70, visibility=1, key_is_virtual=True, updated_at='2024-05-29 09:58:40 UTC')
Provenance
.created_by = 'testuser1'
.storage = '/home/runner/work/lamin-usecases/lamin-usecases/docs/test-scrna'
.transform = 'Standardize and append a batch of data'
.run = '2024-05-29 09:58:22 UTC'
Labels
.cell_types = 'dendritic cell', 'CD4-positive helper T cell', 'effector memory CD4-positive, alpha-beta T cell, terminally differentiated', 'cytotoxic T cell', 'CD8-positive, CD25-positive, alpha-beta regulatory T cell', 'CD14-positive, CD16-positive monocyte', 'CD16-positive, CD56-dim natural killer cell, human', 'B cell, CD19-positive', 'CD38-positive naive B cell'
Features
'cell_type' = 'dendritic cell', 'CD4-positive helper T cell', 'effector memory CD4-positive, alpha-beta T cell, terminally differentiated', 'cytotoxic T cell', 'CD8-positive, CD25-positive, alpha-beta regulatory T cell', 'CD14-positive, CD16-positive monocyte', 'CD16-positive, CD56-dim natural killer cell, human', 'B cell, CD19-positive', 'CD38-positive naive B cell'
Feature sets
'var' = 'TLE5', 'S1PR4', 'CD164', 'SMIM24', 'DCAF10', 'RAB13', 'TPM3', 'HES4', 'HAX1', 'ADD3', 'GSTK1', 'GTF3C6', 'SNX2', 'ACAA1', 'MATK', 'ZYX', 'JAML', 'CD3E', 'CD3D', 'EXOG'
'obs' = 'cell_type'
Compare features露
Here we compute shared genes:
artifact1_genes = artifact1.features["var"]
artifact2_genes = artifact2.features["var"]
shared_genes = artifact1_genes & artifact2_genes
len(shared_genes)
749
shared_genes.list("symbol")[:10]
['HES4',
'TNFRSF4',
'SSU72',
'PARK7',
'RBP7',
'SRM',
'MAD2L2',
'AGTRAP',
'TNFRSF1B',
'EFHD2']
Compare cell types露
artifact1_celltypes = artifact1.cell_types.all()
artifact2_celltypes = artifact2.cell_types.all()
shared_celltypes = artifact1_celltypes & artifact2_celltypes
shared_celltypes_names = shared_celltypes.list("name")
shared_celltypes_names
['CD4-positive helper T cell',
'CD16-positive, CD56-dim natural killer cell, human']
Load the individual artifacts露
We could either load the artifacts into memory or access them in backed
mode through .backed()
to lazily load their content.
Let鈥檚 load them into memory:
adata1 = artifact1.load()
adata2 = artifact2.load()
We can now subset the two collections by shared cell types:
adata1_subset = adata1[adata1.obs["cell_type"].isin(shared_celltypes_names)]
adata2_subset = adata2[adata2.obs["cell_type"].isin(shared_celltypes_names)]