Source code for post_processing.logparser

"""Implements JSON pasers, summarizing the raw QPU output files.

USAGE:
    | python -m post_processing.logparser summarize_dwave ./run_logs/dwave   ./run_logs/summaries/dwave_summary.csv   ./run_logs/summaries/dwave_stats.csv dwave
    |
    | python -m post_processing.logparser summarize_ibm   ./run_logs/ibm-qpu ./run_logs/summaries/ibm-qpu_summary.csv ./run_logs/summaries/ibm-qpu_stats.csv "IBM-QPU"
    |
    | python -m post_processing.logparser summarize_quera ./run_logs/quera   ./run_logs/summaries/quera_summary.csv   ./run_logs/summaries/quera_stats.csv
    |
    | python -m post_processing.logparser extract_samples dwave ./run_logs/dwave/<instance>.json ./run_logs/dwave/samples-csv/<instance>.sample.csv ./instances/QUBO ./instances/orig

The module implements the abstract parser class befining a universal interface
:py:class:`post_processing.logparser.QPULogParser` and the three derived classes
for device-specific code. Each of these most notably implements the
method ``extract_samples`` used to extract the data regarding individual QPU
shots, which allows to recover a solution and is then used to calculate the
objective values with the helper functions :py:func:`calculate_QUBO_objective`
and :py:func:`calculate_orig_objective`.

When run from the command line as presented above, the function specified in the
first argument is run. Therefore, log parser interface is wrapped into several
functions accessible from the command line.

"""
import json
import pandas as pd
from glob import glob
import os
import numpy as np

from sys import argv
from abc import abstractmethod
from ast import literal_eval as make_tuple

from TSP_inst import load_instance_by_ID as load_TSP_by_ID
from TSP_inst import unpack_tour_QUBO_bitstring, obj_val
from TSP_inst import is_feasible as TSP_is_feasible

from MWC_inst import load_instance_by_ID as load_MWC_by_ID
from MWC_inst import get_objective_from_sol as get_MWC_objective

from UDMWIS_inst import load_instance_by_ID as load_UDMWIS_by_ID
from UDMWIS_inst import get_objective_from_sol as get_UDMWIS_objective

from MIS_inst import is_IS, extract_G_from_json
from MIS_inst import load_instance_by_ID as load_UDMIS_by_ID

from qubo_utils import get_instance_size_by_ID, get_QUBO_by_ID, load_QUBO
from qubo_utils import instance_present_in_folder

######################################################################
# Helper functions

[docs] def get_inst_type(inst_id): """Helper: Extracts the instance type from ``inst_id``.""" if inst_id[:3] == "TSP": return "TSP" elif inst_id[:3] == "MWC": return "MWC" elif inst_id[:6] == "UDMWIS": return "UDMWIS" elif inst_id[:5] == "UDMIS": return "UDMIS" else: raise ValueError(f"{inst_id}: wrong ID; unexpected instance type marker (TSP/MWC/UDMWIS expected).")
[docs] def calculate_QUBO_objective(inst_id, top_samples, orig_dir="./instances/orig", qubo_dir="./instances/QUBO"): """Helper: Returns the QUBO objective and the feasibility flag from a collection of samples. Calculates the objective values using the QUBO machinery and assesses the feasibility of each respective solution. Args: inst_id(str): instance id, top_samples(list): best samples to choose from (list of bitstrings). orig_dir(str): directory with original instance JSONs, qubo_dir(str): directory with QUBO instance JSONs, Returns: A tuple of ``(obj, feas)``, where ``obj`` is the best (minimum) feasible QUBO objective value (just minimum if no feasible solutions found among ``top_samples``) and ``feas`` = ``True`` if it is feasible, and ``False`` otherwise. Notes: - ``top_samples`` must contain a list of bitstrings, where higher ranking qubits are on the left: e.g., ``[b3, b2, b1, b0]``. """ filename = get_QUBO_by_ID(inst_id, folder=qubo_dir) Q, P, C, qubojs = load_QUBO(filename) int_samples = [np.array([int(s) for s in reversed(x)]) for x in top_samples] QUBO_objectives = [(0.5 * x @ Q @ x + P @ x + C) for x in int_samples] match get_inst_type(inst_id): case "TSP": D, _ = load_TSP_by_ID(inst_id, directory=orig_dir) tours = [unpack_tour_QUBO_bitstring([s for s in reversed(sol)], len(D)) for sol in top_samples] feasible = [TSP_is_feasible(tour, D) for tour in tours] case "MWC": G = load_MWC_by_ID(inst_id, directory=orig_dir) feasible = [True for _ in top_samples] case "UDMIS": G, js = load_UDMIS_by_ID(inst_id, directory=orig_dir) # Note that this order of nodes is correct # because ``int_samples`` is already reversed above # (as compared to ``top_samples``) feasible = [is_IS(G, sol) for sol in int_samples] case other: raise ValueError(f"{inst_id}: wrong instance ID (get_inst_type returned type {get_inst_type(inst_id)})") feasible_objs = [QUBO_objectives[i] for i in range(len(QUBO_objectives)) if feasible[i]] if len(feasible_objs)==0: return np.min(QUBO_objectives), False else: return np.min(feasible_objs), True
[docs] def calculate_orig_objective(inst_id, top_samples, orig_dir="./instances/orig"): """Helper: Calculates the objective given the instance ID and the collection of bitstrings. Uses the logic of the original problem, not the universal QUBO code. Args: inst_id(str): instance id, top_samples(list): best samples to choose from (bitstrings). Notes: - ``top_samples`` must contain a list of bitstrings, where higher ranking qubits are on the left: e.g., ``[b3, b2, b1, b0]``. """ match get_inst_type(inst_id): case "TSP": D, _ = load_TSP_by_ID(inst_id, directory=orig_dir) objs = [obj_val(D, unpack_tour_QUBO_bitstring( [s for s in reversed(sol)], len(D))) for sol in top_samples if sol is not None] objs = [obj for obj in objs if obj is not None] if len(objs)>0: return min(objs) else: return None case "MWC": G = load_MWC_by_ID(inst_id, directory=orig_dir) return max([get_MWC_objective(G, [s for s in reversed(sol)]) for sol in top_samples]) case "UDMIS": G, _ = load_UDMIS_by_ID(inst_id, directory=orig_dir) int_sols = [[int(s) for s in reversed(sol)] for sol in top_samples] objs = [sum(int_sol) for int_sol in int_sols if is_IS(G, int_sol)] if len(objs)>0: return max(objs) else: return None case other: raise ValueError(f"{inst_id}: unknown instance type '{get_inst_type(inst_id)}'.")
###################################################################### # Base log parser class
[docs] class QPULogParser: """Implements the basic JSON log parser interface. Attributes: files(list): list of processed files (filenames) df(pd.DataFrame): the data accumulated """ def __init__(self, files=None, log=True, orig_dir="./instances/orig", qubo_dir="./instances/QUBO"): self.files = files self.log = log self.inst_stats = dict() self.logtype="UNDEFINED" self.orig_dir = orig_dir self.qubo_dir = qubo_dir
[docs] @abstractmethod def _extract_successful_line(self, js, filename): """Extracts a single successful instance run (from a single JSON).""" raise NotImplementedError
[docs] @abstractmethod def _extract_failed_line(self, js, filename, statsfile=None): """Extracts a single failed instance run (from a single JSON).""" raise NotImplementedError
[docs] def save(self, outfile, statsfile=None): """Helper: saves the statistics on the processed files. This generates ``*_stats.csv`` type of files, which summarize the number of successful and failed runs per instance. """ self.df.to_csv(outfile, index=False) if self.log: print(f"Saved {len(self.df)} entries to {outfile}.") if statsfile is not None: stats = pd.DataFrame({ "logtype": [self.logtype for _ in self.inst_stats], "instance_id" : [inst_id for inst_id in self.inst_stats], "instance_type": [get_inst_type(inst_id) for inst_id in self.inst_stats], "qubo_vars" : [get_instance_size_by_ID(inst_id) for inst_id in self.inst_stats], "success_runs": [stats[0] for (_, stats) in self.inst_stats.items()], "failed_runs": [stats[1] for (_, stats) in self.inst_stats.items()], }) stats.to_csv(statsfile, index=False) if self.log: print(f"Saved {len(stats)} unique instances stats to {statsfile}.")
[docs] def process_files(self, filenames=None, outfile=None, statsfile=None): """Processes the list of JSON raw log files (in ``filenames``). A universal high-level procedure, which relies on `_extract_successful_line` and `_extract_failed_line` methods implemented for each respective log file type (device type). """ if filenames is None: filenames = self.files failed_instances = [] notfound_instances = [] for filename in filenames: if self.log: print(f"Adding: {filename}", flush=True) with open(filename, 'r') as infile: js = json.load(infile) if 'instance_id' in js: inst_id = js['instance_id'] elif 'problem' in js: # this is a QuEra logfile # it has slightly different format ( :-/ ) inst_name = js['problem']["instance_name"] with open(self.orig_dir + "/" + inst_name + ".json",'r') as ofile: origjs = json.load(ofile) inst_id = origjs['description']['instance_id'] else: raise ValueError(f"{filename}: instance ID not found.") if not instance_present_in_folder(inst_id): notfound_instances += [inst_id] print(f"{inst_id} (file {filename}): not found in 'instances/', skipping.") continue if inst_id not in self.inst_stats: self.inst_stats[inst_id] = [0, 0] if 'success' in js: if js['success']: self.df.loc[len(self.df)] = self._extract_successful_line(js, filename) self.inst_stats[js['instance_id']][0] += 1 else: self.df.loc[len(self.df)] = self._extract_failed_line(js, filename) failed_instances.append(filename) self.inst_stats[js['instance_id']][1] += 1 else: # this is a QuEra log file (hopefully) if ('qpu_result' in js) and (len(js['qpu_result']['qpu_counts'])>0): self.df.loc[len(self.df)] = self._extract_successful_line(js, filename) self.inst_stats[inst_id][0] += 1 else: self.df.loc[len(self.df)] = self._extract_failed_line(js, filename) failed_instances.append(filename) self.inst_stats[inst_id][1] += 1 if self.log: print(f"✅ Processed {len(filenames)} json files.") if len(failed_instances)>0: print(f"❌ Including the following {len(failed_instances)} failed runs:\n" + "\n".join(failed_instances)) else: print("(No failed instances in the list.)") if len(notfound_instances)>0: print(f"❌ Skipped the following {len(failed_instances)} instances (no data found in 'instances/'):\n" + "\n".join(notfound_instances)) else: print("(All instances in the list were found in 'instances/'.)") if outfile is not None: self.save(outfile, statsfile)
###################################################################### # QPU-specific parsers
[docs] class DWaveLogParser(QPULogParser): """Implements the D-Wave specific log parsing code.""" def __init__(self, files=None, log=True): super().__init__(files, log) self.logtype="DWave" self.df = pd.DataFrame(columns = ["filename", "start_timestamp", "end_timestamp", "instance_id", "instance_type", "qubo_vars", "sol_time", "obj_from_QPU_sol", "success", "solver_name", "chip_id", "chip_type", "topology", "num_reads", "binary_vars", "emb_qubits", "embedding_time"])
[docs] def _extract_successful_line(self, js, filename): """Extracts a summary for a single DWave's log. This function extracts a "successful" summary line: the one corresponding to an experiment that yielded some solutions (feasible or not). """ n_log_qubits = 0 n_act_qubits = 0 embedding = js['solver']['outcome']['emb_properties']['embedding'] for qubit in embedding: n_log_qubits += 1 n_act_qubits += len(embedding[qubit]) samples = DWaveLogParser.extract_samples(js) assert samples is not None best_sols = samples["solution"] # choose the best from *all* samples return [filename, js['timestamps']['start'], js['timestamps']['end'], js['instance_id'], get_inst_type(js['instance_id']), get_instance_size_by_ID(js['instance_id']), (js['timestamps']['end'] - js['timestamps']['start']), calculate_orig_objective(js["instance_id"], best_sols, orig_dir=self.orig_dir), js["success"], js['solver_name'], js["solver"]["outcome"]["emb_properties"]["child_properties"]["chip_id"], js["solver"]["outcome"]["emb_properties"]["child_properties"]["category"], js["solver"]["outcome"]["emb_properties"]["child_properties"]["topology"]["type"], js['solver']['options']['num_reads'], n_log_qubits, n_act_qubits, js['solver']['outcome']['emb_precalc_dt']]
[docs] def _extract_failed_line(self, js, filename): """Extracts a summary for a single D-Wave's log. This function extracts a "failed" summary line: the one corresponding to an experiment that yielded no solutions. (A separate function is needed as some fields might be absent from such logfile, as compared to a "successful" one.) """ return [filename, js['timestamps']['start'], js['timestamps']['end'], js['instance_id'], get_inst_type(js['instance_id']), get_instance_size_by_ID(js['instance_id']), (js['timestamps']['end'] - js['timestamps']['start']), pd.NA, False, js['solver_name'], pd.NA, pd.NA, pd.NA, pd.NA, pd.NA, pd.NA, pd.NA]
[docs] @staticmethod def extract_samples(js, logfile=None, qubo_dir="./instances/QUBO", orig_dir="./instances/orig"): """Extracts the sample data from JSON (DWave). Args: js: loaded JSON, logfile: log file name (to save into the output) qubo_dir, orig_dir(str): respective JSON files directories (for objective calculations) Notes: - resulting solution bitstrings are in **reverse** order: e.g., ``[b3, b2, b1, b0]``. Returns: a ``pd.DataFrame`` with the samples, or ``None`` if corresponding JSON files are not found in either of ``qubo_dir`` or ``orig_dir``. """ sols = [] energies = [] true_objs = [] # values calculated as QUBO values (incl. penalties) num_occs = [] chain_breaks = [] true_objs = [] feasibility=[] orig_objs = [] # values calculated using the original instance-specific code inst_id = js["instance_id"] if not instance_present_in_folder(inst_id): print(f"{inst_id} ({logfile}): not found in '{qubo_dir}' or '{orig_dir}', skipping.") return None for sample in js["solver"]["outcome"]["samples"]: s = sample['sample'] # sample data (solution) assert [str(x) for x in s] == [str(j) for j in range(len(s))] # if sample['chain_break_fraction']>0: # continue bitstring = "".join([str(s[str(x)]) for x in range(len(s)-1, -1,-1)]) sols.append(bitstring) energies.append(sample["energy"]) num_occs.append(int(sample["num_occurrences"])) chain_breaks.append(float(sample["chain_break_fraction"])) obj, feasible = calculate_QUBO_objective(inst_id, [bitstring], orig_dir=orig_dir, qubo_dir=qubo_dir) true_objs.append(obj) feasibility.append(feasible) if feasible: orig_obj = calculate_orig_objective(inst_id, [bitstring], orig_dir=orig_dir) else: orig_obj = None orig_objs.append(orig_obj) inst_ids = [inst_id for _ in range(len(sols))] inst_type = [get_inst_type(inst_id) for _ in range(len(sols))] inst_size = [get_instance_size_by_ID(inst_id, folder=qubo_dir) for _ in range(len(sols))] logfiles = [logfile for _ in range(len(sols))] return pd.DataFrame(zip(logfiles, inst_ids, inst_type, inst_size, sols, feasibility, energies, num_occs, chain_breaks, true_objs, orig_objs), columns=['logfile', 'inst_id', 'inst_type', 'inst_size','solution', 'sol_feasible', 'energy', 'no_occ', "chain_break_frac", "objective", "orig_obj"])
[docs] class IBMLogParser (QPULogParser): """Implements the IBM specific log parsing code.""" def __init__(self, files=None, log=True, logtype="IBM"): super().__init__(files, log) self.logtype=logtype self.df = pd.DataFrame(columns = ["logfile", "start_timestamp", "end_timestamp", "instance_id", "instance_type", "qubo_vars", "success", "sol_time", "obj_from_QPU_sol", "classic_solver", "classic_solver_params", "backend_name", "sampler_shots", "est_shots"])
[docs] def _extract_successful_line(self, js, filename): """Extracts a single instance run, parsing a single IBM's log. This function extract a "successful" line: the one corresponding to an experiment that yielded some solutions (feasible or not). """ samples = IBMLogParser.extract_samples(js) if samples is not None: best_sols = samples["solution"] # choose the best from *all* samples return [filename, js['timestamps']['start'], js['timestamps']['end'], js['instance_id'], get_inst_type(js['instance_id']), get_instance_size_by_ID(js['instance_id']), js["success"], (js['timestamps']['end'] - js['timestamps']['start']), calculate_orig_objective(js["instance_id"], best_sols, orig_dir=self.orig_dir), js['args'][1], js['solver']['options']['optimizer_kwargs'], js['solver']['options']['backend_name'], js['solver']['options']['sampler_shots'], js['solver']['options']['estimator_shots']] else: # not quite successful run: # This is a case when the sample contains no data for some # reason. Still, some more information is available, # as compared to the case processed by # :py:func:`IBMLogParser._extract_failed_line` return [filename, js['timestamps']['start'], js['timestamps']['end'], js['instance_id'], get_inst_type(js['instance_id']), get_instance_size_by_ID(js['instance_id']), "NO_SOLS_LOGGED", (js['timestamps']['end'] - js['timestamps']['start']), pd.NA, js['args'][1], js['solver']['options']['optimizer_kwargs'], js['solver']['options']['backend_name'], js['solver']['options']['sampler_shots'], js['solver']['options']['estimator_shots']]
[docs] def _extract_failed_line(self, js, filename): """Extracts a single instance run, parsing a single IBM log. This function extract a "failed" summary line: the one corresponding to an experiment that yielded no solutions. (A separate function is needed as some fields might be absent from such logfile, as compared to a "successful" one.) """ return [filename, js['timestamps']['start'], js['timestamps']['end'], js['instance_id'], get_inst_type(js['instance_id']), get_instance_size_by_ID(js['instance_id']), False, (js['timestamps']['end'] - js['timestamps']['start']), pd.NA, pd.NA, pd.NA, pd.NA, pd.NA, pd.NA]
[docs] @staticmethod def extract_samples(js, logfile=None, qubo_dir="./instances/QUBO", orig_dir="./instances/orig"): """Extracts the sample data from JSON (IBM). Args: js: loaded JSON, logfile: log file name (to save into the output) logfile(str): analyzed logfile name, qubo_dir, orig_dir(str): respective JSON files directories (for objective calculations) Returns: a ``pd.DataFrame`` with the samples. """ if not js["success"]: return None jobs = js["solver"]["outcome"]["session_data"]["job_data"] quasi_dists_jobs = 0 samples = dict() for jobname, job in jobs.items(): if job["quasi_dists"] is not None: assert len(job["quasi_dists"]) == 1 assert len(job["quasi_dists"][0]) == 1 nshots = job["metadata"][0]["shots"] for sol, freq in job["quasi_dists"][0][0].items(): if sol not in samples: samples[sol] = 0 samples[sol] += freq*nshots if len(samples) == 0: return None # couldn't find a solution in the log sols = [] no_occ = [] true_objs = [] orig_objs = [] feasibility = [] inst_id = js["instance_id"] N = get_instance_size_by_ID(inst_id, folder=qubo_dir) for sample in samples: bitstring = ("{:0" + str(N) + "b}").format(int(sample)) sols.append(bitstring) no_occ.append(samples[sample]) obj, feasible = calculate_QUBO_objective(inst_id, [bitstring], orig_dir=orig_dir, qubo_dir=qubo_dir) true_objs.append(obj) feasibility.append(feasible) if feasible: orig_obj = calculate_orig_objective(inst_id, [bitstring], orig_dir=orig_dir) else: orig_obj = None orig_objs.append(orig_obj) IDs = [js["instance_id"] for _ in range(len(orig_objs))] return pd.DataFrame(zip(IDs, sols, feasibility, no_occ, true_objs, orig_objs), columns=['inst_id', 'solution', 'sol_feasible', 'no_occ', 'objective', 'orig_obj'])
[docs] @staticmethod def extract_convergence_data(js, logfile=None): """Extracts the outer loop convergence data from an IBM log. Args: js: loaded JSON, logfile: log file name (to save into the output) Returns: a ``pd.DataFrame`` with the samples. """ # number of classic iterations if not js['success']: return None history = js['solver']['outcome']['solver_history'] K = len(history['params']) params = [] costs = [] variances = [] shots = [] timestamps = [] convdf = pd.DataFrame(columns=["logfile", "instance_id", "iteration", "variable", "value"]) for k in range(K): row = [logfile, js["instance_id"], k] convdf.loc[len(convdf)] = row + ["cost", history['cost'][k]] convdf.loc[len(convdf)] = row + ["timestamp", history['timestamp'][k]] params = history['params'][k] for i,param in enumerate([float(x) for x in params[1:-1].split()]): convdf.loc[len(convdf)] = row + [f"param{i}", param] if len(history['metadata'][k]) > 1: for mdvar in history['metadata'][k]: convdf.loc[len(convdf)] = row + [mdvar, history['metadata'][k][mdvar]] return convdf
[docs] class QuEraLogParser (QPULogParser): """Implements the QuEra specific log parsing code.""" def __init__(self, files=None, log=True): super().__init__(files, log) self.logtype="QuEra" self.df = pd.DataFrame(columns = ["logfile", "start_timestamp", "end_timestamp", "instance_id", "instance_type", "qubo_vars", "R", "version", "success", "sol_time", "obj_from_QPU_sol"])
[docs] def _extract_successful_line(self, js, filename): """Extracts a single instance run, parsing a single QuEra log. """ samples = QuEraLogParser.extract_samples(js) inst_name = js['problem']["instance_name"] with open(self.orig_dir + "/" + inst_name + ".json",'r') as ofile: origjs = json.load(ofile) if samples is not None: best_sols = samples["solution"] # choose the best from *all* samples inst_id = origjs['description']['instance_id'] return [filename, js['meta']['timestamp_start'], js['meta']['timestamp_end'], inst_id, get_inst_type(inst_id), get_instance_size_by_ID(inst_id), js['settings']['R'], js['meta']['version'], True, (js['meta']['timestamp_end'] - js['meta']['timestamp_start']), calculate_orig_objective(inst_id, best_sols, orig_dir=self.orig_dir)] else: # no samples recorder (not really successful run) return [filename, js['meta']['timestamp_start'], js['meta']['timestamp_end'], inst_id, get_inst_type(inst_id), get_instance_size_by_ID(inst_id), js['settings']['R'], "NO_SOLS_LOGGED", (js['meta']['timestamp_end'] - js['meta']['timestamp_start']), pd.NA]
[docs] def _extract_failed_line(self, js, filename): """This function is not implemented, we never received such result.""" # We expect it to never happen for QuEra raise NotImplementedError("QuEra: found an unsuccessful item / something went wrong before.")
[docs] @staticmethod def extract_samples(js, logfile=None, qubo_dir="./instances/QUBO/", orig_dir="./instances/orig/"): """Extracts the sample data from JSON (QuEra logfile). Args: js: loaded JSON, logfile: log file name (to save into the output) qubo_dir, orig_dir(str): respective JSON files directories (for objective calculations) Note: The method assumes (UD)MIS instances only! Returns: a ``pd.DataFrame`` with the samples. """ sols = [] counts = [] feasibility = [] objs = [] orig_objs = [] inst_ids = [] inst_name = js['problem']["instance_name"] with open(orig_dir + "/" + inst_name + ".json",'r') as ofile: origjs = json.load(ofile) inst_id = origjs['description']['instance_id'] Q, P, C, qubojs = load_QUBO(get_QUBO_by_ID(inst_id)) G = extract_G_from_json(origjs) for sample in js['qpu_result']['qpu_counts']: bs = {'g':'0', 'r':'1'} bitstring = [bs[s] for s in reversed(sample)] QUBO_obj, fflag = calculate_QUBO_objective(inst_id, [bitstring]) inst_ids.append(inst_id) objs.append(QUBO_obj) feasibility.append(fflag) sols.append(bitstring) counts.append(js['qpu_result']['qpu_counts'][sample]) orig_objs.append(calculate_orig_objective(inst_id, [bitstring])) return pd.DataFrame(zip(inst_ids, sols, feasibility, counts, objs, orig_objs), columns = ['inst_id', 'solution', 'sol_feasible', 'no_occ', 'objective', 'orig_obj'])
###################################################################### # Script commands (to be run from the command line, see USAGE)
[docs] def summarize_dwave(directory, outfile, statsfile): """Processes all the DWave logs in the given directory.""" parser = DWaveLogParser(files=glob(directory + "/*.json")) parser.process_files(outfile=outfile, statsfile=statsfile)
[docs] def summarize_ibm(directory, outfile, statsfile, logtype="IBM"): """Processes all the IBM logs in the given directory.""" parser = IBMLogParser(files=glob(directory + "/*.json"), logtype=logtype) parser.process_files(outfile=outfile, statsfile=statsfile)
[docs] def summarize_quera(directory, outfile, statsfile): """Processes all the QuEra logs in the given directory.""" parser = QuEraLogParser(files=glob(directory + "/*.json")) parser.process_files(outfile=outfile, statsfile=statsfile)
[docs] def extract_samples(logtype, infile, outfile, qubo_dir="./instances/QUBO", orig_dir="./instances/orig"): """Extracts the last sample from the JSON log.""" with open(infile, 'r') as infilehandle: js = json.load(infilehandle) ParserClass = {"dwave": DWaveLogParser, "ibm": IBMLogParser, "quera": QuEraLogParser} parser = ParserClass[logtype.lower()](files = [infile]) samples = parser.extract_samples(js, logfile=str(infile), qubo_dir=qubo_dir, orig_dir=orig_dir) if samples is None: print(f"{infile}: unsuccessful run, skipping.") exit(1) else: samples.to_csv(outfile)
[docs] def extract_all_samples(logtype, jsondir, outdir): """Extracts the last sample from the JSONs in `jsondir`.""" for infile in glob(jsondir + "/benchmark_*.json"): print(f"Processing {infile}...", end="", flush=True) if not instance_present_in_folder(inst_id): notfound_instances += [inst_id] print(f"{inst_id} (file {filename}): not found in 'instances/', skipping.") continue with open(infile, 'r') as infilehandle: js = json.load(infilehandle) if not js["success"]: print(f"skipped, success={js['success']}", flush=True) continue outfile = outdir + "/" + os.path.basename(os.path.normpath(infile)) + \ '.sample.csv' ParserClass = {"dwave": DWaveLogParser, "ibm": IBMLogParser, "quantinuum": QuantinuumLogParser} parser = ParserClass[logtype.lower()](files = [infile]) samples = parser.extract_samples(js, logfile=str(infile)) samples.to_csv(outfile) print(f"done", flush=True)
if __name__ == '__main__': # treats command line arguments as function name and further args args = argv globals()[args[1]](*args[2:])