{ "cells": [ { "cell_type": "markdown", "id": "5eea1686-99e8-48b9-9e4a-700f64775697", "metadata": {}, "source": [ "# Dealing with Multiple LHE Files\n", "\n", "Oftentimes, you may wish to juggle many LHE files that have been generated using the same (or extremely similar) methods and you wish to combine all of these LHE files into one \"sample\" which you can analyze with a single set of analysis code. This can be done rather easily and quickly by utilizing an intermediate parquet file which is supported by [awkward](https://awkward-array.org/doc/main/user-guide/how-to-convert-arrow.html)." ] }, { "cell_type": "code", "execution_count": 1, "id": "a9acf232-2a50-4207-8b83-fbd23abfb3b2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\n", " created_by: parquet-cpp-arrow version 19.0.1\n", " num_columns: 164\n", " num_rows: 30000\n", " num_row_groups: 1\n", " format_version: 2.6\n", " serialized_size: 0" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import awkward as ak\n", "\n", "# Use an example LHE file from package scikit-hep-testdata\n", "from skhep_testdata import data_path\n", "\n", "import pylhe\n", "\n", "lhe_file = data_path(\"pylhe-drell-yan-ll-lhe.gz\")\n", "\n", "# Our input files will simply be multiple copies of the same file for the sake of this example,\n", "# but you can imagine doing the same process below with actually different LHE files\n", "list_of_input_files = [lhe_file for _ in range(3)]\n", "\n", "# get arrays for each file\n", "unmerged_arrays = [\n", " pylhe.to_awkward(pylhe.LHEFile.fromfile(f, with_attributes=True).events)\n", " for f in list_of_input_files\n", "]\n", "# merge arrays into single mega-array\n", "array = ak.concatenate(unmerged_arrays)\n", "# store merged array into cache parquet file\n", "ak.to_parquet(array, \"merged.parquet\")\n", "# any below analysis code can retrieve array using ak.from_parquent('merged.parquet')" ] }, { "cell_type": "markdown", "id": "b068205f-c06a-4810-9d1f-bda5a02e13df", "metadata": {}, "source": [ "Now all the analysis code can utilize the merged file which only needs to be regenerated if more files want to be included or the source LHE files change." ] }, { "cell_type": "code", "execution_count": 2, "id": "b1b13ca5-7945-470d-b305-fb38891f0c66", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
[{eventinfo: {nparticles: 4, pid: 1, ...}, weights: {...}, particles: ..., ...},\n",
       " {eventinfo: {nparticles: 5, pid: 1, ...}, weights: {...}, particles: ..., ...},\n",
       " {eventinfo: {nparticles: 5, pid: 1, ...}, weights: {...}, particles: ..., ...},\n",
       " {eventinfo: {nparticles: 4, pid: 1, ...}, weights: {...}, particles: ..., ...},\n",
       " {eventinfo: {nparticles: 4, pid: 1, ...}, weights: {...}, particles: ..., ...},\n",
       " {eventinfo: {nparticles: 4, pid: 1, ...}, weights: {...}, particles: ..., ...},\n",
       " {eventinfo: {nparticles: 5, pid: 1, ...}, weights: {...}, particles: ..., ...},\n",
       " {eventinfo: {nparticles: 4, pid: 1, ...}, weights: {...}, particles: ..., ...},\n",
       " {eventinfo: {nparticles: 5, pid: 1, ...}, weights: {...}, particles: ..., ...},\n",
       " {eventinfo: {nparticles: 4, pid: 1, ...}, weights: {...}, particles: ..., ...},\n",
       " ...,\n",
       " {eventinfo: {nparticles: 4, pid: 1, ...}, weights: {...}, particles: ..., ...},\n",
       " {eventinfo: {nparticles: 4, pid: 1, ...}, weights: {...}, particles: ..., ...},\n",
       " {eventinfo: {nparticles: 4, pid: 1, ...}, weights: {...}, particles: ..., ...},\n",
       " {eventinfo: {nparticles: 4, pid: 1, ...}, weights: {...}, particles: ..., ...},\n",
       " {eventinfo: {nparticles: 5, pid: 1, ...}, weights: {...}, particles: ..., ...},\n",
       " {eventinfo: {nparticles: 4, pid: 1, ...}, weights: {...}, particles: ..., ...},\n",
       " {eventinfo: {nparticles: 4, pid: 1, ...}, weights: {...}, particles: ..., ...},\n",
       " {eventinfo: {nparticles: 4, pid: 1, ...}, weights: {...}, particles: ..., ...},\n",
       " {eventinfo: {nparticles: 5, pid: 1, ...}, weights: {...}, particles: ..., ...}]\n",
       "-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
       "backend: cpu\n",
       "nbytes: 50.2 MB\n",
       "type: 30000 * Event[\n",
       "    eventinfo: EventInfo[\n",
       "        nparticles: float64,\n",
       "        pid: float64,\n",
       "        weight: float64,\n",
       "        scale: float64,\n",
       "        aqed: float64,\n",
       "        aqcd: float64\n",
       "    ],\n",
       "    weights: Weights[\n",
       "        "1": float64,\n",
       "        "2": float64,\n",
       "        "3": float64,\n",
       "        "4": float64,\n",
       "        "5": float64,\n",
       "        "6": float64,\n",
       "        "7": float64,\n",
       "        "8": float64,\n",
       "        "9": float64,\n",
       "        "10": float64,\n",
       "        "11": float64,\n",
       "        "12": float64,\n",
       "        "13": float64,\n",
       "        "14": float64,\n",
       "        "15": float64,\n",
       "        "16": float64,\n",
       "        "17": float64,\n",
       "        "18": float64,\n",
       "        "19": float64,\n",
       "        "20": float64,\n",
       "        "21": float64,\n",
       "        "22": float64,\n",
       "        "23": float64,\n",
       "        "24": float64,\n",
       "        "25": float64,\n",
       "        "26": float64,\n",
       "        "27": float64,\n",
       "        "28": float64,\n",
       "        "29": float64,\n",
       "        "30": float64,\n",
       "        "31": float64,\n",
       "        "32": float64,\n",
       "        "33": float64,\n",
       "        "34": float64,\n",
       "        "35": float64,\n",
       "        "36": float64,\n",
       "        "37": float64,\n",
       "        "38": float64,\n",
       "        "39": float64,\n",
       "        "40": float64,\n",
       "        "41": float64,\n",
       "        "42": float64,\n",
       "        "43": float64,\n",
       "        "44": float64,\n",
       "        "45": float64,\n",
       "        "46": float64,\n",
       "        "47": float64,\n",
       "        "48": float64,\n",
       "        "49": float64,\n",
       "        "50": float64,\n",
       "        "51": float64,\n",
       "        "52": float64,\n",
       "        "53": float64,\n",
       "        "54": float64,\n",
       "        "55": float64,\n",
       "        "56": float64,\n",
       "        "57": float64,\n",
       "        "58": float64,\n",
       "        "59": float64,\n",
       "        "60": float64,\n",
       "        "61": float64,\n",
       "        "62": float64,\n",
       "        "63": float64,\n",
       "        "64": float64,\n",
       "        "65": float64,\n",
       "        "66": float64,\n",
       "        "67": float64,\n",
       "        "68": float64,\n",
       "        "69": float64,\n",
       "        "70": float64,\n",
       "        "71": float64,\n",
       "        "72": float64,\n",
       "        "73": float64,\n",
       "        "74": float64,\n",
       "        "75": float64,\n",
       "        "76": float64,\n",
       "        "77": float64,\n",
       "        "78": float64,\n",
       "        "79": float64,\n",
       "        "80": float64,\n",
       "        "81": float64,\n",
       "        "82": float64,\n",
       "        "83": float64,\n",
       "        "84": float64,\n",
       "        "85": float64,\n",
       "        "86": float64,\n",
       "        "87": float64,\n",
       "        "88": float64,\n",
       "        "89": float64,\n",
       "        "90": float64,\n",
       "        "91": float64,\n",
       "        "92": float64,\n",
       "        "93": float64,\n",
       "        "94": float64,\n",
       "        "95": float64,\n",
       "        "96": float64,\n",
       "        "97": float64,\n",
       "        "98": float64,\n",
       "        "99": float64,\n",
       "        "100": float64,\n",
       "        "101": float64,\n",
       "        "102": float64,\n",
       "        "103": float64,\n",
       "        "104": float64,\n",
       "        "105": float64,\n",
       "        "106": float64,\n",
       "        "107": float64,\n",
       "        "108": float64,\n",
       "        "109": float64,\n",
       "        "110": float64,\n",
       "        "111": float64,\n",
       "        "112": float64,\n",
       "        "113": float64,\n",
       "        "114": float64,\n",
       "        "115": float64,\n",
       "        "116": float64,\n",
       "        "117": float64,\n",
       "        "118": float64,\n",
       "        "119": float64,\n",
       "        "120": float64,\n",
       "        "121": float64,\n",
       "        "122": float64,\n",
       "        "123": float64,\n",
       "        "124": float64,\n",
       "        "125": float64,\n",
       "        "126": float64,\n",
       "        "127": float64,\n",
       "        "128": float64,\n",
       "        "129": float64,\n",
       "        "130": float64,\n",
       "        "131": float64,\n",
       "        "132": float64,\n",
       "        "133": float64,\n",
       "        "134": float64,\n",
       "        "135": float64,\n",
       "        "136": float64,\n",
       "        "137": float64,\n",
       "        "138": float64,\n",
       "        "139": float64,\n",
       "        "140": float64,\n",
       "        "141": float64,\n",
       "        "142": float64,\n",
       "        "143": float64,\n",
       "        "144": float64,\n",
       "        "145": float64\n",
       "    ],\n",
       "    particles: var * Particle[\n",
       "        vector: Momentum4D[\n",
       "            px: float64,\n",
       "            py: float64,\n",
       "            pz: float64,\n",
       "            e: float64\n",
       "        ],\n",
       "        id: float64,\n",
       "        status: float64,\n",
       "        mother1: float64,\n",
       "        mother2: float64,\n",
       "        color1: float64,\n",
       "        color2: float64,\n",
       "        m: float64,\n",
       "        lifetime: float64,\n",
       "        spin: float64\n",
       "    ]\n",
       "]
" ], "text/plain": [ "" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ak.from_parquet(\"merged.parquet\")" ] }, { "cell_type": "code", "execution_count": null, "id": "e3daa456-460a-43a8-90df-518cc417fb6b", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.5" } }, "nbformat": 4, "nbformat_minor": 5 }