Skip to content

Commit

Permalink
test v0.1.0.1
Browse files Browse the repository at this point in the history
  • Loading branch information
andyjslee committed Aug 7, 2023
1 parent 1de729b commit ef70f09
Show file tree
Hide file tree
Showing 14 changed files with 225 additions and 142 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build/
__pycache__/
src/ace.egg-info/
notebooks/*.pt
examples/*.xlsx
examples/outputs/*.xlsx
*.pyc
.coverage*
*.DS_Store
Expand Down
45 changes: 36 additions & 9 deletions examples/run_ace_deconvolve.sh
Original file line number Diff line number Diff line change
@@ -1,20 +1,47 @@
echo "Deconvolution Example 1: Empirical (Pool ID)."
ace deconvolve \
--readout-file-type pool_id \
--readout-files 25peptides_5perpool_3x_pool-id_readout.xlsx \
--assignment-excel-file 25peptides_5perpool_3x_seqsim_sat-solver.xlsx \
--readout-files ../test/data/25peptides_5perpool_3x_pool-id_readout.xlsx \
--assignment-excel-file ../test/data/25peptides_5perpool_3x_noseqsim_sat-solver.xlsx \
--min-spot-count 300 \
--output-excel-file 25peptides_5perpool_3x_pool-id_readout_deconvolved_empirical.xlsx
--output-excel-file outputs/25peptides_5perpool_3x_pool-id_readout_deconvolved_empirical.xlsx

echo "Deconvolution Example 2: Expectation Maximization (Pool ID)."
ace deconvolve \
--readout-file-type pool_id \
--readout-files 25peptides_5perpool_3x_pool-id_readout.xlsx \
--assignment-excel-file 25peptides_5perpool_3x_seqsim_sat-solver.xlsx \
--readout-files ../test/data/25peptides_5perpool_3x_pool-id_readout.xlsx \
--assignment-excel-file ../test/data/25peptides_5perpool_3x_noseqsim_sat-solver.xlsx \
--mode em \
--output-excel-file 25peptides_5perpool_3x_pool-id_readout_deconvolved_em.xlsx
--output-excel-file outputs/25peptides_5perpool_3x_pool-id_readout_deconvolved_em.xlsx

echo "Deconvolution Example 3: LASSO (Pool ID)."
ace deconvolve \
--readout-file-type pool_id \
--readout-files 25peptides_5perpool_3x_pool-id_readout.xlsx \
--assignment-excel-file 25peptides_5perpool_3x_seqsim_sat-solver.xlsx \
--readout-files ../test/data/25peptides_5perpool_3x_pool-id_readout.xlsx \
--assignment-excel-file ../test/data/25peptides_5perpool_3x_noseqsim_sat-solver.xlsx \
--mode lasso \
--output-excel-file 25peptides_5perpool_3x_pool-id_readout_deconvolved_lasso.xlsx
--output-excel-file outputs/25peptides_5perpool_3x_pool-id_readout_deconvolved_lasso.xlsx

echo "Deconvolution Example 4: Empirical (AID Plate Reader)."
ace deconvolve \
--readout-file-type aid_plate_reader \
--readout-files ../test/data/25peptides_5perpool_3x_aid-plate-reader_readout.xlsx \
--assignment-excel-file ../test/data/25peptides_5perpool_3x_noseqsim_sat-solver.xlsx \
--min-spot-count 300 \
--output-excel-file outputs/25peptides_5perpool_3x_aid-plate-reader_readout_deconvolved_empirical.xlsx

echo "Deconvolution Example 5: Expectation Maximization (AID Plate Reader)."
ace deconvolve \
--readout-file-type aid_plate_reader \
--readout-files ../test/data/25peptides_5perpool_3x_aid-plate-reader_readout.xlsx \
--assignment-excel-file ../test/data/25peptides_5perpool_3x_noseqsim_sat-solver.xlsx \
--mode em \
--output-excel-file outputs/25peptides_5perpool_3x_aid-plate-reader_readout_deconvolved_em.xlsx

echo "Deconvolution Example 6: LASSO (AID Plate Reader)."
ace deconvolve \
--readout-file-type aid_plate_reader \
--readout-files ../test/data/25peptides_5perpool_3x_aid-plate-reader_readout.xlsx \
--assignment-excel-file ../test/data/25peptides_5perpool_3x_noseqsim_sat-solver.xlsx \
--mode lasso \
--output-excel-file outputs/25peptides_5perpool_3x_aid-plate-reader_readout_deconvolved_lasso.xlsx
150 changes: 74 additions & 76 deletions examples/run_ace_generate.sh
Original file line number Diff line number Diff line change
@@ -1,158 +1,156 @@
echo "Example 1. Golfy without peptide sequences (120/12/3x)"
# Golfy
echo "Golfy Example 1: 90 Peptides, 9 Peptides per Pool, 3x Coverage."
ace generate \
--num-peptides 120 \
--num-peptides-per-pool 12 \
--num-peptides 90 \
--num-peptides-per-pool 9 \
--num-coverage 3 \
--mode golfy \
--output-excel-file 120peptides_12perpool_3x_noseqsim_golfy.xlsx \
--output-excel-file outputs/90peptides_9perpool_3x_noseqsim_golfy.xlsx \
--assign-well-ids 1 \
--num-plate-wells 96
echo ""
--num-plate-wells 96 \

echo "Example 2. SAT solver without peptide sequences (120/12/3x)"
echo "Golfy Example 2: 100 Peptides, 5 Peptides per Pool, 3x Coverage."
ace generate \
--num-peptides 120 \
--num-peptides-per-pool 12 \
--num-peptides 100 \
--num-peptides-per-pool 5 \
--num-coverage 3 \
--cpsat-solver-num-processes 6 \
--mode cpsat_solver \
--output-excel-file 120peptides_12perpool_3x_noseqsim_sat-solver.xlsx \
--mode golfy \
--output-excel-file outputs/100peptides_5perpool_3x_noseqsim_golfy.xlsx \
--assign-well-ids 1 \
--num-plate-wells 96
echo ""
--num-plate-wells 96 \

echo "Example 3. Golfy solver without peptide sequences (100/5/3x)"
echo "Golfy Example 3: 120 Peptides, 8 Peptides per Pool, 3x Coverage."
ace generate \
--num-peptides 100 \
--num-peptides-per-pool 5 \
--num-peptides 120 \
--num-peptides-per-pool 8 \
--num-coverage 3 \
--mode golfy \
--output-excel-file 100peptides_5perpool_3x_noseqsim_golfy.xlsx \
--output-excel-file outputs/120peptides_8perpool_3x_noseqsim_golfy.xlsx \
--assign-well-ids 1 \
--num-plate-wells 96
echo ""

echo "Example 4. SAT solver without peptide sequences (100/5/3x)"
echo "Golfy Example 4: 120 Peptides, 12 Peptides per Pool, 3x Coverage."
ace generate \
--num-peptides 100 \
--num-peptides-per-pool 5 \
--num-peptides 120 \
--num-peptides-per-pool 12 \
--num-coverage 3 \
--cpsat-solver-num-processes 6 \
--mode cpsat_solver \
--output-excel-file 100peptides_5perpool_3x_noseqsim_sat-solver.xlsx \
--mode golfy \
--output-excel-file outputs/120peptides_12perpool_3x_noseqsim_golfy.xlsx \
--assign-well-ids 1 \
--num-plate-wells 96
echo ""

echo "Example 5. Golfy without peptide sequences (120/8/3x)"
echo "Golfy Example 5: 220 Peptides, 11 Peptides per Pool, 3x Coverage."
ace generate \
--num-peptides 120 \
--num-peptides-per-pool 8 \
--num-peptides 220 \
--num-peptides-per-pool 11 \
--num-coverage 3 \
--mode golfy \
--output-excel-file 120peptides_8perpool_3x_noseqsim_golfy.xlsx \
--output-excel-file outputs/220peptides_11perpool_3x_noseqsim_golfy.xlsx \
--assign-well-ids 1 \
--num-plate-wells 96
echo ""

echo "Example 6. SAT solver without peptide sequences (120/8/3x)"
echo "Golfy Example 6: 240 Peptides, 12 Peptides per Pool, 3x Coverage."
ace generate \
--num-peptides 120 \
--num-peptides-per-pool 8 \
--num-peptides 240 \
--num-peptides-per-pool 12 \
--num-coverage 3 \
--cpsat-solver-num-processes 6 \
--cpsat-solver-max-peptides-per-block 64 \
--mode cpsat_solver \
--output-excel-file 120peptides_8perpool_3x_noseqsim_sat-solver.xlsx \
--mode golfy \
--output-excel-file outputs/240peptides_12perpool_3x_noseqsim_golfy.xlsx \
--assign-well-ids 1 \
--num-plate-wells 96
echo ""

echo "Example 7. Golfy with peptide sequences (25/5/3x)"
echo "Golfy Example 7: 400 Peptides, 5 Peptides per Pool, 3x Coverage."
ace generate \
--peptides-excel-file ../test/data/25peptide_sequences.xlsx \
--num-peptides 400 \
--num-peptides-per-pool 5 \
--num-coverage 3 \
--mode golfy \
--output-excel-file 25peptides_5perpool_3x_seqsim_golfy.xlsx
echo ""
--output-excel-file outputs/240peptides_12perpool_3x_noseqsim_golfy_384plates.xlsx \
--assign-well-ids 1 \
--num-plate-wells 384

echo "Example 8. SAT solver with peptide sequences (25/5/3x)"
echo "Golfy Example 8: 25 Peptides, 5 Peptides per Pool, 3x Coverage (with Sequences)."
ace generate \
--peptides-excel-file ../test/data/25peptide_sequences.xlsx \
--num-peptides-per-pool 5 \
--num-coverage 3 \
--cpsat-solver-num-processes 6 \
--mode cpsat_solver \
--output-excel-file 25peptides_5perpool_3x_seqsim_sat-solver.xlsx
echo ""
--mode golfy \
--output-excel-file outputs/25peptides_5perpool_3x_seqsim_golfy.xlsx

echo "Example 9. Golfy without peptide sequences (90/9/3x)"
# CP-SAT Solver
echo "CP-SAT Solver Example 1: 90 Peptides, 9 Peptides per Pool, 3x Coverage."
ace generate \
--num-peptides 90 \
--num-peptides-per-pool 9 \
--num-coverage 3 \
--mode golfy \
--output-excel-file 90peptides_9perpool_3x_noseqsim_golfy.xlsx \
--cpsat-solver-num-processes 6 \
--mode cpsat_solver \
--output-excel-file outputs/90peptides_9perpool_3x_noseqsim_sat-solver.xlsx \
--assign-well-ids 1 \
--num-plate-wells 96
echo ""

echo "Example 10. SAT solver without peptide sequences (90/9/3x)"
echo "CP-SAT Solver Example 2: 100 Peptides, 5 Peptides per Pool, 3x Coverage."
ace generate \
--num-peptides 90 \
--num-peptides-per-pool 9 \
--num-peptides 100 \
--num-peptides-per-pool 5 \
--num-coverage 3 \
--cpsat-solver-num-processes 6 \
--mode cpsat_solver \
--output-excel-file 90peptides_9perpool_3x_noseqsim_sat-solver.xlsx \
--output-excel-file outputs/100peptides_5perpool_3x_noseqsim_sat-solver.xlsx \
--assign-well-ids 1 \
--num-plate-wells 96
echo ""

echo "Example 11. Golfy without peptide sequences (220/11/3x)"
echo "CP-SAT Solver Example 3: 120 Peptides, 12 Peptides per Pool, 3x Coverage."
ace generate \
--num-peptides 220 \
--num-peptides-per-pool 11 \
--num-peptides 120 \
--num-peptides-per-pool 12 \
--num-coverage 3 \
--mode golfy \
--output-excel-file 220peptides_11perpool_3x_noseqsim_golfy.xlsx \
--cpsat-solver-num-processes 6 \
--mode cpsat_solver \
--output-excel-file outputs/120peptides_12perpool_3x_noseqsim_sat-solver.xlsx \
--assign-well-ids 1 \
--num-plate-wells 96
echo ""

echo "Example 12. SAT solver without peptide sequences (220/11/3x)"
echo "CP-SAT Solver Example 4: 120 Peptides, 8 Peptides per Pool, 3x Coverage."
ace generate \
--num-peptides 220 \
--num-peptides-per-pool 11 \
--num-peptides 120 \
--num-peptides-per-pool 8 \
--num-coverage 3 \
--cpsat-solver-num-processes 6 \
--cpsat-solver-max-peptides-per-block 64 \
--mode cpsat_solver \
--output-excel-file 220peptides_11perpool_3x_noseqsim_sat-solver.xlsx \
--output-excel-file outputs/120peptides_8perpool_3x_noseqsim_sat-solver.xlsx \
--assign-well-ids 1 \
--num-plate-wells 96
echo ""

echo "Example 13. Golfy without peptide sequences (240/12/3x)"
echo "CP-SAT Solver Example 5: 220 Peptides, 11 Peptides per Pool, 3x Coverage."
ace generate \
--num-peptides 240 \
--num-peptides-per-pool 12 \
--num-peptides 220 \
--num-peptides-per-pool 11 \
--num-coverage 3 \
--mode golfy \
--output-excel-file 240peptides_12perpool_3x_noseqsim_golfy.xlsx \
--cpsat-solver-num-processes 6 \
--mode cpsat_solver \
--output-excel-file outputs/220peptides_11perpool_3x_noseqsim_sat-solver.xlsx \
--assign-well-ids 1 \
--num-plate-wells 96
echo""

echo "Example 14. SAT solver without peptide sequences (240/12/3x)"
echo "CP-SAT Solver Example 6: 240 Peptides, 12 Peptides per Pool, 3x Coverage."
ace generate \
--num-peptides 240 \
--num-peptides-per-pool 12 \
--num-coverage 3 \
--cpsat-solver-num-processes 6 \
--mode cpsat_solver \
--output-excel-file 240peptides_12perpool_3x_noseqsim_sat-solver.xlsx \
--output-excel-file outputs/240peptides_12perpool_3x_noseqsim_sat-solver.xlsx \
--assign-well-ids 1 \
--num-plate-wells 96
echo""

echo "CP-SAT Solver Example 7: 25 Peptides, 5 Peptides per Pool, 3x Coverage (with Sequences)."
ace generate \
--peptides-excel-file ../test/data/25peptide_sequences.xlsx \
--num-peptides-per-pool 5 \
--num-coverage 3 \
--cpsat-solver-num-processes 6 \
--mode cpsat_solver \
--output-excel-file outputs/25peptides_5perpool_3x_seqsim_sat-solver.xlsx
12 changes: 0 additions & 12 deletions examples/test.sh

This file was deleted.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ requires = [

[project]
name = "ace"
version = "0.1.0.0"
version = "0.1.0.1"
requires-python = ">=3.7"
keywords = [
"elispot",
Expand Down
8 changes: 5 additions & 3 deletions src/acelib/block_assignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def get_96_well_ids():
return ['%s%s' % (i[0], i[1]) for i in list(product(row_prefixes, col_prefixes))]
def get_384_well_ids():
row_prefixes = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P']
col_prefixes = range(1, 15)
col_prefixes = range(1, 25)
return ['%s%s' % (i[0], i[1]) for i in list(product(row_prefixes, col_prefixes))]

curr_plate_id = 1
Expand Down Expand Up @@ -147,7 +147,7 @@ def get_384_well_ids():
curr_plate_id += 1
curr_well_id = curr_well_ids[0]
curr_well_ids.pop(0)
self.plate_ids['pool_id'] = (curr_plate_id, curr_well_id)
self.plate_ids[pool_id] = (curr_plate_id, curr_well_id)

def get_peptide_sequence(self, peptide_id: PeptideId) -> PoolId:
"""
Expand Down Expand Up @@ -220,7 +220,9 @@ def to_dataframe(self) -> pd.DataFrame:
else:
data['plate_id'].append('')
data['well_id'].append('')
return pd.DataFrame(data)
df = pd.DataFrame(data)
df.sort_values(by=['peptide_id'], inplace=True)
return df

def is_optimal(
self,
Expand Down
11 changes: 11 additions & 0 deletions src/acelib/cli/cli_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,14 @@ def add_ace_generate_arg_parser(sub_parsers):
choices=SequenceSimilarityFunctions.ALL,
help="Sequence similarity function (default: %s)." % GENERATE_SEQUENCE_SIMILARITY_FUNCTION
)
parser_optional.add_argument(
"--cluster-peptides",
dest="cluster_peptides",
type=bool,
default=True,
required=False,
help="Cluster peptides if set to true (default: true)."
)

parser_optional_golfy = parser.add_argument_group("optional arguments (applies when '--mode golfy')")
parser_optional_golfy.add_argument(
Expand Down Expand Up @@ -272,10 +280,12 @@ def run_ace_generate_from_parsed_args(args):
# Step 1. Load peptide data
if args.peptides_excel_file is not None:
peptides = convert_dataframe_to_peptides(df_peptides=pd.read_excel(args.peptides_excel_file))
cluster_peptides = args.cluster_peptides
else:
peptides = []
for i in range(1, args.num_peptides + 1):
peptides.append(('peptide_%i' % i, ''))
cluster_peptides = False

# Step 2. Check input parameters
if len(peptides) % args.num_peptides_per_pool != 0 and args.mode == GenerateModes.CPSAT_SOLVER:
Expand All @@ -288,6 +298,7 @@ def run_ace_generate_from_parsed_args(args):
num_peptides_per_pool=args.num_peptides_per_pool,
num_coverage=args.num_coverage,
trained_model_file=resources.path('acelib.resources.models', 'trained_model5.pt'),
cluster_peptides=cluster_peptides,
mode=args.mode,
sequence_similarity_function=args.sequence_similarity_function,
sequence_similarity_threshold=args.sequence_similarity_threshold,
Expand Down
4 changes: 3 additions & 1 deletion src/acelib/deconvolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,9 @@ def to_dataframe(self):
data['pool_ids'].append(';'.join([str(p) for p in pool_ids]))
data['num_coverage'].append(len(pool_ids))
data['deconvolution_result'].append(label)
return pd.DataFrame(data)
df = pd.DataFrame(data)
df.sort_values(by=['peptide_id'], inplace=True)
return df


def convert_to_golfy_spotcounts(
Expand Down
Loading

0 comments on commit ef70f09

Please sign in to comment.