Precomptued Analysis¶

Use precomputed optimal angles to measure the expected value of \(\langle C \rangle\) across a variety of problem types, sizes, \(p\)-depth, and random instances.

Load in Raw Data¶

Go through each record, load in supporting objects, flatten everything into records, and put into a massive dataframe.

[1]:

import recirq
import cirq
import numpy as np
import pandas as pd
from recirq.qaoa.experiments.precomputed_execution_tasks import \
    DEFAULT_BASE_DIR, DEFAULT_PROBLEM_GENERATION_BASE_DIR, DEFAULT_PRECOMPUTATION_BASE_DIR

records = []
for record in recirq.iterload_records(dataset_id="2020-03-tutorial", base_dir=DEFAULT_BASE_DIR):
    dc_task = record['task']
    apre_task = dc_task.precomputation_task
    pgen_task = apre_task.generation_task

    problem = recirq.load(pgen_task, base_dir=DEFAULT_PROBLEM_GENERATION_BASE_DIR)['problem']
    record['problem'] = problem.graph
    record['problem_type'] = problem.__class__.__name__
    record['optimum'] = recirq.load(apre_task, base_dir=DEFAULT_PRECOMPUTATION_BASE_DIR)['optimum']
    record['bitstrings'] = record['bitstrings'].bits
    recirq.flatten_dataclass_into_record(record, 'task')
    recirq.flatten_dataclass_into_record(record, 'precomputation_task')
    recirq.flatten_dataclass_into_record(record, 'generation_task')
    recirq.flatten_dataclass_into_record(record, 'optimum')
    records.append(record)
df_raw = pd.DataFrame(records)
df_raw['timestamp'] = pd.to_datetime(df_raw['timestamp'])
df_raw.head()

[1]:

	timestamp	bitstrings	qubits	final_qubits	circuit	violation_indices	problem	problem_type	dataset_id	device_name	...	generation_task.dataset_id	generation_task.device_name	instance_i	n_qubits	optimum.p	f_val	gammas	betas	min_c	max_c
0	2020-04-08 13:11:32.151452	[[0, 0, 0, 0], [0, 0, 1, 0], [1, 1, 1, 1], [0,...	[(5, 3), (6, 2), (6, 3), (6, 4)]	[(5, 3), (6, 2), (6, 3), (6, 4)]	[(PhX(-0.824)^0.5((5, 3)), PhX(-0.824)^0.5((6,...	[]	(2, 0, 1, 3)	HardwareGridProblem	2020-03-tutorial	Syc23-simulator	...	2020-03-tutorial	Sycamore23	4	4	2	-2.616073	[0.47109541452232295, 0.5900885654628348]	[-0.44860990386986666, -0.29400122107756244]	-3.0	3.0
1	2020-04-08 13:11:32.520129	[[0, 1, 1, 0], [0, 0, 0, 1], [0, 0, 0, 0], [1,...	[(5, 3), (6, 2), (6, 3), (6, 4)]	[(5, 3), (6, 2), (6, 3), (6, 4)]	[(PhX(-0.644)^0.39663553581308286((5, 3)), PhX...	[]	(2, 0, 1, 3)	HardwareGridProblem	2020-03-tutorial	Syc23-simulator	...	2020-03-tutorial	Sycamore23	4	4	3	-3.000000	[0.3047317419709853, 0.6664224586095869, 0.607...	[-0.5760240058080092, -0.36261585803438645, -0...	-3.0	3.0
2	2020-04-08 13:11:31.499590	[[0, 0, 0, 0], [1, 0, 1, 1], [0, 1, 0, 0], [1,...	[(5, 3), (6, 2), (6, 3), (6, 4)]	[(5, 3), (6, 2), (6, 3), (6, 4)]	[(PhX(0.806)^0.3587079604881125((5, 3)), PhX(0...	[]	(2, 0, 1, 3)	HardwareGridProblem	2020-03-tutorial	Syc23-simulator	...	2020-03-tutorial	Sycamore23	4	4	1	-1.632993	[0.4776585668842565]	[-0.3926990816996555]	-3.0	3.0
3	2020-04-08 13:11:31.077127	[[1, 0], [1, 0], [0, 1], [1, 0], [1, 0], [1, 0...	[(5, 3), (6, 3)]	[(5, 3), (6, 3)]	[(PhX(0.163)^0.27171459224559236((5, 3)), PhX(...	[]	(1, 0)	HardwareGridProblem	2020-03-tutorial	Syc23-simulator	...	2020-03-tutorial	Sycamore23	4	2	2	-1.000000	[0.6283182637521854, 0.7853973288957912]	[-0.3926989532796281, -0.07854016690845925]	-1.0	1.0
4	2020-04-08 13:11:31.247321	[[1, 0], [1, 0], [0, 1], [0, 1], [0, 1], [1, 0...	[(5, 3), (6, 3)]	[(5, 3), (6, 3)]	[(PhX(-0.14)^0.5((5, 3)), PhX(-0.5)^0.5((6, 3)...	[]	(1, 0)	HardwareGridProblem	2020-03-tutorial	Syc23-simulator	...	2020-03-tutorial	Sycamore23	4	2	3	-1.000000	[0.5620701143250424, 0.7076351823020001, 0.798...	[-0.4199391572618354, -0.1086414306397252, -0....	-1.0	1.0

5 rows × 25 columns

Narrow down to Relevant Data¶

Drop unnecessary metadata and use bitstrings to compute the expected value of the energy. In general, it’s better to save the raw data and lots of metadata so we can use it if it becomes necessary in the future.

[2]:

from recirq.qaoa.simulation import hamiltonian_objectives, hamiltonian_objective_avg_and_err
import cirq.google as cg

def compute_energy_w_err(row):
    permutation = []
    for i, q in enumerate(row['qubits']):
        fi = row['final_qubits'].index(q)
        permutation.append(fi)

    energy, err = hamiltonian_objective_avg_and_err(row['bitstrings'], row['problem'], permutation)
    return pd.Series([energy, err], index=['energy', 'err'])


# Start cleaning up the raw data
df = df_raw.copy()

# Don't need these columns for present analysis
df = df.drop(['gammas', 'betas', 'circuit', 'violation_indices',
              'precomputation_task.dataset_id',
              'generation_task.dataset_id',
              'generation_task.device_name'], axis=1)

# p is specified twice (from a parameter and from optimum)
assert (df['optimum.p'] == df['p']).all()
df = df.drop('optimum.p', axis=1)

# Compute energies
df = df.join(df.apply(compute_energy_w_err, axis=1))
df = df.drop(['bitstrings', 'qubits', 'final_qubits', 'problem'], axis=1)

# Normalize
df['energy_ratio'] = df['energy'] / df['min_c']
df['err_ratio'] = df['err'] * np.abs(1/df['min_c'])
df['f_val_ratio'] = df['f_val'] / df['min_c']

df

[2]:

	timestamp	problem_type	dataset_id	device_name	n_shots	structured	echoed	p	instance_i	n_qubits	f_val	min_c	max_c	energy	err	energy_ratio	err_ratio	f_val_ratio
0	2020-04-08 13:11:32.151452	HardwareGridProblem	2020-03-tutorial	Syc23-simulator	50000	True	False	2	4	4	-2.616073	-3.0	3.0	-1.89004	0.007524	0.630013	0.002508	0.872024
1	2020-04-08 13:11:32.520129	HardwareGridProblem	2020-03-tutorial	Syc23-simulator	50000	True	False	3	4	4	-3.000000	-3.0	3.0	-1.85736	0.007451	0.619120	0.002484	1.000000
2	2020-04-08 13:11:31.499590	HardwareGridProblem	2020-03-tutorial	Syc23-simulator	50000	True	False	1	4	4	-1.632993	-3.0	3.0	-1.39456	0.007291	0.464853	0.002430	0.544331
3	2020-04-08 13:11:31.077127	HardwareGridProblem	2020-03-tutorial	Syc23-simulator	50000	True	False	2	4	2	-1.000000	-1.0	1.0	-0.89096	0.002031	0.890960	0.002031	1.000000
4	2020-04-08 13:11:31.247321	HardwareGridProblem	2020-03-tutorial	Syc23-simulator	50000	True	False	3	4	2	-1.000000	-1.0	1.0	-0.85592	0.002313	0.855920	0.002313	1.000000
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
145	2020-04-08 13:08:23.360051	SKProblem	2020-03-tutorial	Syc23-simulator	50000	True	False	3	0	7	-7.888209	-9.0	11.0	-1.53480	0.020784	0.170533	0.002309	0.876468
146	2020-04-08 13:08:16.029073	SKProblem	2020-03-tutorial	Syc23-simulator	50000	True	False	1	0	7	-5.318905	-9.0	11.0	-3.02000	0.019369	0.335556	0.002152	0.590989
147	2020-04-08 13:09:02.773520	SKProblem	2020-03-tutorial	Syc23-simulator	50000	True	False	2	1	7	-6.942368	-9.0	11.0	-2.36920	0.020491	0.263244	0.002277	0.771374
148	2020-04-08 13:09:06.524879	SKProblem	2020-03-tutorial	Syc23-simulator	50000	True	False	3	1	7	-7.888209	-9.0	11.0	-1.50696	0.020841	0.167440	0.002316	0.876468
149	2020-04-08 13:08:59.681882	SKProblem	2020-03-tutorial	Syc23-simulator	50000	True	False	1	1	7	-5.318905	-9.0	11.0	-3.04136	0.019464	0.337929	0.002163	0.590989

150 rows × 18 columns

Plots¶

[3]:

%matplotlib inline
from matplotlib import pyplot as plt

import seaborn as sns
sns.set_style('ticks')

plt.rc('axes', labelsize=16, titlesize=16)
plt.rc('xtick', labelsize=14)
plt.rc('ytick', labelsize=14)
plt.rc('legend', fontsize=14, title_fontsize=16)

# theme colors
QBLUE = '#1967d2'
QRED = '#ea4335ff'
QGOLD = '#fbbc05ff'
QGREEN = '#34a853ff'

QGOLD2 = '#ffca28'
QBLUE2 = '#1e88e5'

[4]:

C = r'\langle C \rangle'
CMIN = r'C_\mathrm{min}'
COVERCMIN = f'${C}/{CMIN}$'

[5]:

def percentile(n):
    def percentile_(x):
        return np.nanpercentile(x, n)
    percentile_.__name__ = 'percentile_%s' % n
    return percentile_

Raw swarm plots of all data¶

[6]:

import numpy as np
from matplotlib import pyplot as plt

pretty_problem = {
    'HardwareGridProblem': 'Hardware Grid',
    'SKProblem': 'SK Model',
    'ThreeRegularProblem': '3-Regular MaxCut'
}

for problem_type in ['HardwareGridProblem', 'SKProblem', 'ThreeRegularProblem']:
    df1 = df
    df1 = df1[df1['problem_type'] == problem_type]

    for p in sorted(df1['p'].unique()):
        dfb = df1
        dfb = dfb[dfb['p'] == p]
        dfb = dfb.sort_values(by='n_qubits')

        plt.subplots(figsize=(7,5))

        n_instances = dfb.groupby('n_qubits').count()['energy_ratio'].unique()
        if len(n_instances) == 1:
            n_instances = n_instances[0]
            label = f'{n_instances}'
        else:
            label = f'{min(n_instances)} - {max(n_instances)}'

        #sns.boxplot(dfb['n_qubits'], dfb['energy_ratio'], color=QBLUE, saturation=1)
        #sns.boxplot(dfb['n_qubits'], dfb['f_val_ratio'], color=QGREEN, saturation=1)
        sns.swarmplot(dfb['n_qubits'], dfb['energy_ratio'], color=QBLUE)
        sns.swarmplot(dfb['n_qubits'], dfb['f_val_ratio'], color=QGREEN)

        plt.axhline(1, color='grey', ls='-')
        plt.axhline(0, color='grey', ls='-')

        plt.title(f'{pretty_problem[problem_type]}, {label} instances, p={p}')
        plt.xlabel('# Qubits')
        plt.ylabel(COVERCMIN)
        plt.tight_layout()
        plt.show()

../_images/qaoa_Precomputed-Analysis_10_0.png

../_images/qaoa_Precomputed-Analysis_10_1.png

../_images/qaoa_Precomputed-Analysis_10_2.png

../_images/qaoa_Precomputed-Analysis_10_3.png

../_images/qaoa_Precomputed-Analysis_10_4.png

../_images/qaoa_Precomputed-Analysis_10_5.png

../_images/qaoa_Precomputed-Analysis_10_6.png

../_images/qaoa_Precomputed-Analysis_10_7.png

../_images/qaoa_Precomputed-Analysis_10_8.png

Compare SK and Hardware Grid vs. \(n\)¶

[7]:

pretty_problem = {
    'HardwareGridProblem': 'Hardware Grid',
    'SKProblem': 'SK Model',
    'ThreeRegularProblem': '3-Regular MaxCut'
}

df1 = df
df1 = df1[
    ((df1['problem_type'] == 'SKProblem') & (df1['p'] == 3))
    | ((df1['problem_type'] == 'HardwareGridProblem') & (df1['p'] == 3))
    ]
df1 = df1.sort_values(by='n_qubits')

MINQ = 3
df1 = df1[df1['n_qubits'] >= MINQ]

plt.subplots(figsize=(8, 6))
plt.xlim((8, 23))

# SK
dfb = df1
dfb = dfb[dfb['problem_type'] == 'SKProblem']
sns.swarmplot(dfb['n_qubits'], dfb['energy_ratio'], s=5, linewidth=0.5, edgecolor='k', color=QRED)
sns.swarmplot(dfb['n_qubits'], dfb['f_val_ratio'], s=5, linewidth=0.5, edgecolor='k', color=QRED,
              marker='s')
dfg = dfb.groupby('n_qubits').mean().reset_index()
# --------


# Hardware
dfb = df1
dfb = dfb[dfb['problem_type'] == 'HardwareGridProblem']
sns.swarmplot(dfb['n_qubits'], dfb['energy_ratio'], s=5, linewidth=0.5, edgecolor='k', color=QBLUE)
sns.swarmplot(dfb['n_qubits'], dfb['f_val_ratio'], s=5, linewidth=0.5, edgecolor='k', color=QBLUE,
              marker='s')
dfg = dfb.groupby('n_qubits').mean().reset_index()
# -------


plt.axhline(1, color='grey', ls='-')
plt.axhline(0, color='grey', ls='-')

plt.xlabel('# Qubits')
plt.ylabel(COVERCMIN)

from matplotlib.patches import Patch
from matplotlib.lines import Line2D
from matplotlib.legend_handler import HandlerTuple

lelements = [
    Line2D([0], [0], color=QBLUE, marker='o', ms=7, ls='', ),
    Line2D([0], [0], color=QRED, marker='o', ms=7, ls='', ),

    Line2D([0], [0], color='k', marker='s', ms=7, ls='', markerfacecolor='none'),
    Line2D([0], [0], color='k', marker='o', ms=7, ls='', markerfacecolor='none'),
]

plt.legend(lelements, ['Hardware Grid', 'SK Model', 'Noiseless', 'Experiment', ], loc='best',
           title=f'p = 3',
           handler_map={tuple: HandlerTuple(ndivide=None)}, framealpha=1.0)
plt.tight_layout()
plt.show()

../_images/qaoa_Precomputed-Analysis_12_0.png

Hardware Grid vs. \(p\)¶

[8]:

dfb = df
dfb = dfb[dfb['problem_type'] == 'HardwareGridProblem']
dfb = dfb[['p', 'instance_i', 'n_qubits', 'energy_ratio', 'f_val_ratio']]
P_LIMIT = max(dfb['p'])

def max_over_p(group):
    i = group['energy_ratio'].idxmax()
    return group.loc[i][['energy_ratio', 'p']]

def count_p(group):
    new = {}
    for i, c in enumerate(np.bincount(group['p'], minlength=P_LIMIT+1)):
        if i == 0:
            continue
        new[f'p{i}'] = c
    return pd.Series(new)


dfgy = dfb.groupby(['n_qubits', 'instance_i']).apply(max_over_p).reset_index()
dfgz = dfgy.groupby(['n_qubits']).apply(count_p).reset_index()
# In the paper, we restrict to n > 10
# dfgz = dfgz[dfgz['n_qubits'] > 10]
dfgz = dfgz.set_index('n_qubits').sum(axis=0)
dfgz /= (dfgz.sum())
dfgz

[8]:

p1    0.25
p2    0.60
p3    0.15
dtype: float64

[10]:

dfb = df
dfb = dfb[dfb['problem_type'] == 'HardwareGridProblem']
dfb = dfb[['p', 'instance_i', 'n_qubits', 'energy_ratio', 'f_val_ratio']]
# In the paper, we restrict to n > 10
# dfb = dfb[dfb['n_qubits'] > 10]
dfg = dfb.groupby('p').agg(['median', percentile(25), percentile(75), 'mean', 'std']).reset_index()

plt.subplots(figsize=(5.5,4))
plt.errorbar(x=dfg['p'], y=dfg['f_val_ratio', 'mean'],
             yerr=(dfg['f_val_ratio', 'std'],
                   dfg['f_val_ratio', 'std']),
             fmt='o-',
             capsize=7,
             color=QGREEN,
             label='Noiseless'
           )
plt.errorbar(x=dfg['p'], y=dfg['energy_ratio', 'mean'],
             yerr=(dfg['energy_ratio', 'std'],
                   dfg['energy_ratio', 'std']),
             fmt='o-',
             capsize=7,
             color=QBLUE,
             label='Experiment'
           )
plt.xlabel('p')
plt.ylabel('Mean ' + COVERCMIN)
plt.ylim((0, 1))
plt.text(0.05, 0.9, r'Hardware Grid', fontsize=16, transform=plt.gca().transAxes, ha='left', va='bottom')
plt.legend(loc='center right')

ax2 = plt.gca().twinx()  # instantiate a second axes that shares the same x-axis

dfgz_p = [int(s[1:]) for s in dfgz.index]
dfgz_y = dfgz.values
ax2.bar(dfgz_p, dfgz_y, color=QBLUE, width=0.9, lw=1, ec='k')
ax2.tick_params(axis='y')
ax2.set_ylim((0, 2))
ax2.set_yticks([0, 0.25, 0.50])
ax2.set_yticklabels(['0%', None, '50%'])
ax2.set_ylabel('Fraction best' + ' ' * 41, fontsize=14)

plt.tight_layout()

../_images/qaoa_Precomputed-Analysis_15_0.png