|
45 | 45 | },
|
46 | 46 | "outputs": [
|
47 | 47 | {
|
48 |
| - "name": "stderr", |
49 |
| - "output_type": "stream", |
50 |
| - "text": [ |
51 |
| - "Processing OWL elements: 100%|██████████| 58028/58028 [00:04<00:00, 14307.11it/s]\n" |
52 |
| - ] |
| 48 | + "data": { |
| 49 | + "application/vnd.jupyter.widget-view+json": { |
| 50 | + "model_id": "8d790577d1ef40a2b9db30f186e3a721", |
| 51 | + "version_major": 2, |
| 52 | + "version_minor": 0 |
| 53 | + }, |
| 54 | + "text/plain": [ |
| 55 | + "Processing OWL elements: 0%| | 0.00/58.0k [00:00<?, ?it/s]" |
| 56 | + ] |
| 57 | + }, |
| 58 | + "metadata": {}, |
| 59 | + "output_type": "display_data" |
53 | 60 | }
|
54 | 61 | ],
|
55 | 62 | "source": [
|
56 | 63 | "import os\n",
|
57 |
| - "local_owl_file = os.path.join(os.path.abspath(''), os.pardir,\n", |
58 |
| - " 'pybiopax', 'tests', 'biopax_test.owl')\n", |
| 64 | + "import pybiopax\n", |
| 65 | + "local_owl_file = os.path.join(pybiopax.__path__[0], 'tests', 'biopax_test.owl')\n", |
59 | 66 | "model = pybiopax.model_from_owl_file(local_owl_file)"
|
60 | 67 | ]
|
61 | 68 | },
|
|
87 | 94 | "metadata": {},
|
88 | 95 | "outputs": [
|
89 | 96 | {
|
90 |
| - "name": "stderr", |
91 |
| - "output_type": "stream", |
92 |
| - "text": [ |
93 |
| - "Processing OWL elements: 100%|██████████| 58028/58028 [00:03<00:00, 14999.51it/s]\n" |
94 |
| - ] |
| 97 | + "data": { |
| 98 | + "application/vnd.jupyter.widget-view+json": { |
| 99 | + "model_id": "23d1cfa031b04847bd635e6dfd085259", |
| 100 | + "version_major": 2, |
| 101 | + "version_minor": 0 |
| 102 | + }, |
| 103 | + "text/plain": [ |
| 104 | + "Processing OWL elements: 0%| | 0.00/58.0k [00:00<?, ?it/s]" |
| 105 | + ] |
| 106 | + }, |
| 107 | + "metadata": {}, |
| 108 | + "output_type": "display_data" |
95 | 109 | },
|
96 | 110 | {
|
97 | 111 | "data": {
|
98 | 112 | "text/plain": [
|
99 |
| - "<pybiopax.biopax.model.BioPaxModel at 0x1180410d0>" |
| 113 | + "<pybiopax.biopax.model.BioPaxModel at 0x123f08c40>" |
100 | 114 | ]
|
101 | 115 | },
|
102 | 116 | "execution_count": 3,
|
|
140 | 154 | "metadata": {},
|
141 | 155 | "outputs": [
|
142 | 156 | {
|
143 |
| - "name": "stderr", |
144 |
| - "output_type": "stream", |
145 |
| - "text": [ |
146 |
| - "Processing OWL elements: 100%|██████████| 63758/63758 [00:05<00:00, 12532.78it/s]\n" |
147 |
| - ] |
| 157 | + "data": { |
| 158 | + "application/vnd.jupyter.widget-view+json": { |
| 159 | + "model_id": "8729f695fed34f2abad116d5e67a8ea7", |
| 160 | + "version_major": 2, |
| 161 | + "version_minor": 0 |
| 162 | + }, |
| 163 | + "text/plain": [ |
| 164 | + "Processing OWL elements: 0%| | 0.00/63.8k [00:00<?, ?it/s]" |
| 165 | + ] |
| 166 | + }, |
| 167 | + "metadata": {}, |
| 168 | + "output_type": "display_data" |
148 | 169 | }
|
149 | 170 | ],
|
150 | 171 | "source": [
|
|
322 | 343 | },
|
323 | 344 | {
|
324 | 345 | "cell_type": "code",
|
325 |
| - "execution_count": 9, |
| 346 | + "execution_count": null, |
326 | 347 | "metadata": {},
|
327 | 348 | "outputs": [
|
328 | 349 | {
|
329 |
| - "name": "stderr", |
330 |
| - "output_type": "stream", |
331 |
| - "text": [ |
332 |
| - "Processing OWL elements: 100%|██████████| 215406/215406 [00:17<00:00, 12426.22it/s]\n" |
333 |
| - ] |
| 350 | + "data": { |
| 351 | + "application/vnd.jupyter.widget-view+json": { |
| 352 | + "model_id": "6ffded4ba63f4eff866469145338066b", |
| 353 | + "version_major": 2, |
| 354 | + "version_minor": 0 |
| 355 | + }, |
| 356 | + "text/plain": [ |
| 357 | + "Processing OWL elements: 0%| | 0.00/215k [00:00<?, ?it/s]" |
| 358 | + ] |
| 359 | + }, |
| 360 | + "metadata": {}, |
| 361 | + "output_type": "display_data" |
334 | 362 | }
|
335 | 363 | ],
|
336 | 364 | "source": [
|
|
340 | 368 | },
|
341 | 369 | {
|
342 | 370 | "cell_type": "code",
|
343 |
| - "execution_count": 10, |
| 371 | + "execution_count": null, |
344 | 372 | "metadata": {},
|
345 | 373 | "outputs": [],
|
346 | 374 | "source": [
|
|
377 | 405 | },
|
378 | 406 | {
|
379 | 407 | "cell_type": "code",
|
380 |
| - "execution_count": 11, |
| 408 | + "execution_count": null, |
381 | 409 | "metadata": {},
|
382 |
| - "outputs": [ |
383 |
| - { |
384 |
| - "name": "stdout", |
385 |
| - "output_type": "stream", |
386 |
| - "text": [ |
387 |
| - "EGFR_HUMAN 229, 654, 678, 693, 727, 764, 768, 845, 869, 892, 915, 925, 944, 991, 992, 998, 1016, 1026, 1039, 1041, 1045, 1046, 1064, 1068, 1069, 1070, 1071, 1081, 1085, 1086, 1092, 1096, 1101, 1104, 1110, 1120, 1125, 1138, 1148, 1166, 1172, 1173, 1197\n", |
388 |
| - "FGFR2_HUMAN 464, 465, 466, 467, 584, 585, 586, 587, 588, 589, 654, 655, 656, 657, 658, 731, 732, 733, 734, 767, 768, 769, 770, 778, 779, 780\n", |
389 |
| - "GAB1_HUMAN 285, 373, 406, 446, 447, 472, 527, 589, 619, 627, 657, 659, 689\n", |
390 |
| - "JAK2_HUMAN 119, 221, 372, 373, 570, 637, 813, 868, 966, 972, 1007, 1008\n", |
391 |
| - "FGFR3_HUMAN 577, 579, 647, 648, 649, 650, 724, 726, 760, 762, 770, 772\n" |
392 |
| - ] |
393 |
| - } |
394 |
| - ], |
| 410 | + "outputs": [], |
395 | 411 | "source": [
|
396 | 412 | "for protein, sites in sorted(phosphosites.items(),\n",
|
397 | 413 | " key=lambda x: len(x[1]),\n",
|
|
419 | 435 | },
|
420 | 436 | {
|
421 | 437 | "cell_type": "code",
|
422 |
| - "execution_count": 12, |
| 438 | + "execution_count": null, |
423 | 439 | "metadata": {},
|
424 |
| - "outputs": [ |
425 |
| - { |
426 |
| - "name": "stderr", |
427 |
| - "output_type": "stream", |
428 |
| - "text": [ |
429 |
| - "Serializing OWL elements: 100%|██████████| 215405/215405 [00:17<00:00, 11974.05it/s]\n" |
430 |
| - ] |
431 |
| - } |
432 |
| - ], |
| 440 | + "outputs": [], |
433 | 441 | "source": [
|
434 | 442 | "pybiopax.model_to_owl_file(model, 'model.owl')"
|
435 | 443 | ]
|
436 | 444 | },
|
437 | 445 | {
|
438 | 446 | "cell_type": "code",
|
439 |
| - "execution_count": 13, |
| 447 | + "execution_count": null, |
440 | 448 | "metadata": {},
|
441 |
| - "outputs": [ |
442 |
| - { |
443 |
| - "name": "stdout", |
444 |
| - "output_type": "stream", |
445 |
| - "text": [ |
446 |
| - "<?xml version='1.0' encoding='utf-8'?>\n", |
447 |
| - "<rdf:RDF xmlns:xsd=\"http://www.w3.org/2001/XMLSchema#\" xmlns:owl=\"http://www.w3.org/2002/07/owl#\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" xmlns:bp=\"http://www.biopax.org/release/biopax-level3.owl#\" xml:base=\"http://pathwaycommons.org/pc12/\">\n", |
448 |
| - " <owl:Ontology rdf:about=\"\">\n", |
449 |
| - " <owl:imports rdf:resource=\"http://www.biopax.org/release/biopax-level3.owl#\"/>\n", |
450 |
| - " </owl:Ontology>\n", |
451 |
| - "\n", |
452 |
| - "<bp:SequenceSite rdf:ID=\"SequenceSite_bfd0338a029352b8e0668f3eb0f6bbac\">\n", |
453 |
| - " <bp:positionStatus rdf:datatype=\"http://www.w3.org/2001/XMLSchema#string\">EQUAL</bp:positionStatus>\n", |
454 |
| - " <bp:sequencePosition rdf:datatype=\"http://www.w3.org/2001/XMLSchema#int\">272</bp:sequencePosition>\n", |
455 |
| - "</bp:SequenceSite>\n", |
456 |
| - "\n", |
457 |
| - "<bp:SequenceSite rdf:ID=\"SequenceSite_14f64bc4cd23eca47d429e39069e06c7\">\n", |
458 |
| - " <bp:positionStatus rdf:datatype=\"http://www.w3.org/2001/XMLSchema#string\">EQUAL</bp:positionStatus>\n", |
459 |
| - " <bp:sequencePosition rdf:datatype=\"http://www.w3.org/2001/XMLSchema#int\">613</bp:sequencePosition>\n", |
460 |
| - "</bp:SequenceSite>\n", |
461 |
| - "\n", |
462 |
| - "<bp:ModificationFeature rdf:ID=\"ModificationFeature_359d393d7d605f94ebb3e1853275973f\">\n", |
463 |
| - " <bp:comment rdf:datatype=\"http://www.w3.org/2001/XMLSchema#string\">MOD_RES 33 33 Phosphothreonine.</bp:comment>\n", |
464 |
| - " <bp:featureLocation rdf:resource=\"#SequenceSite_2bfb5009a4f06e0205c03bfbb9422c1c\"/>\n", |
465 |
| - " <bp:modificationType rdf:resource=\"#SequenceModificationVocabulary_51783aaefc798a70971be2e9fcea2d6e\"/>\n", |
466 |
| - "</bp:ModificationFeature>\n", |
467 |
| - "\n", |
468 |
| - "<bp:SequenceSite rdf:ID=\"SequenceSite_97fa778b0824ac49fa0540c412f7df5b\">\n", |
469 |
| - " <bp:positionStatus rdf:datatype=\"http://www.w3.org/2001/XMLSchema#string\">EQUAL</bp:positionStatus>\n", |
470 |
| - " <bp:sequencePosition rdf:datatype=\"http://www.w3.org/2001/XMLSchema#int\">600</bp:sequencePosition>\n", |
471 |
| - "\n" |
472 |
| - ] |
473 |
| - } |
474 |
| - ], |
| 449 | + "outputs": [], |
475 | 450 | "source": [
|
476 | 451 | "with open('model.owl', 'r') as fh:\n",
|
477 | 452 | " head = ''.join([fh.readline() for _ in range(25)])\n",
|
478 | 453 | " print(head)"
|
479 | 454 | ]
|
| 455 | + }, |
| 456 | + { |
| 457 | + "cell_type": "markdown", |
| 458 | + "metadata": {}, |
| 459 | + "source": [ |
| 460 | + "## Using path constraint strings to traverse a model\n", |
| 461 | + "Using the `pybiopax.paths` module we can find lists of objects that satisfy a path constraint starting from a given object." |
| 462 | + ] |
| 463 | + }, |
| 464 | + { |
| 465 | + "cell_type": "code", |
| 466 | + "execution_count": null, |
| 467 | + "metadata": {}, |
| 468 | + "outputs": [], |
| 469 | + "source": [ |
| 470 | + "from pybiopax.paths import find_objects" |
| 471 | + ] |
| 472 | + }, |
| 473 | + { |
| 474 | + "cell_type": "markdown", |
| 475 | + "metadata": {}, |
| 476 | + "source": [ |
| 477 | + "Let's start with a modification feature objects as a starting point for traversal." |
| 478 | + ] |
| 479 | + }, |
| 480 | + { |
| 481 | + "cell_type": "code", |
| 482 | + "execution_count": null, |
| 483 | + "metadata": {}, |
| 484 | + "outputs": [], |
| 485 | + "source": [ |
| 486 | + "modf = model.objects['ModificationFeature_23b018cf79493a97029da4309b044958']\n", |
| 487 | + "modf" |
| 488 | + ] |
| 489 | + }, |
| 490 | + { |
| 491 | + "cell_type": "markdown", |
| 492 | + "metadata": {}, |
| 493 | + "source": [ |
| 494 | + "Let's now link to the entities of which this is a modification feature using the `feature_of` link. This returns a list of physical entities, in this example, just one." |
| 495 | + ] |
| 496 | + }, |
| 497 | + { |
| 498 | + "cell_type": "code", |
| 499 | + "execution_count": null, |
| 500 | + "metadata": { |
| 501 | + "scrolled": true |
| 502 | + }, |
| 503 | + "outputs": [], |
| 504 | + "source": [ |
| 505 | + "find_objects(modf, 'feature_of')" |
| 506 | + ] |
| 507 | + }, |
| 508 | + { |
| 509 | + "cell_type": "markdown", |
| 510 | + "metadata": {}, |
| 511 | + "source": [ |
| 512 | + "We can extend the path constraint to then link forward to all other features of the entities that this is a feature of using the `feature` link. We additionally set a class constraint `ModificationFeature` to return only `ModificationFeature`s. Finally, we link to the type of the modification feature through the `modification_type` link." |
| 513 | + ] |
| 514 | + }, |
| 515 | + { |
| 516 | + "cell_type": "code", |
| 517 | + "execution_count": null, |
| 518 | + "metadata": {}, |
| 519 | + "outputs": [], |
| 520 | + "source": [ |
| 521 | + "find_objects(modf, 'feature_of/feature:ModificationFeature/modification_type')" |
| 522 | + ] |
480 | 523 | }
|
481 | 524 | ],
|
482 | 525 | "metadata": {
|
|
495 | 538 | "name": "python",
|
496 | 539 | "nbconvert_exporter": "python",
|
497 | 540 | "pygments_lexer": "ipython3",
|
498 |
| - "version": "3.9.6" |
| 541 | + "version": "3.9.7" |
499 | 542 | }
|
500 | 543 | },
|
501 | 544 | "nbformat": 4,
|
|
0 commit comments