Skip to content

Commit

Permalink
Revision 1 of the 1st Edition
Browse files Browse the repository at this point in the history
  • Loading branch information
rchillyard committed Dec 31, 2024
1 parent bd90aa2 commit e5c4626
Show file tree
Hide file tree
Showing 445 changed files with 51,665 additions and 126 deletions.
22 changes: 18 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# Compiled class file
*.class
target/

# Log file
*.log
# Log filea
logs/

# BlueJ files
*.ctxt
Expand All @@ -13,12 +14,25 @@
# Package Files #
*.jar
*.war
*.nar
*.ear
*.zip
*.tar.gz
*.rar

# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
replay_pid*

# IDE files #
*.iml
.idea

bin/*
.classpath
.project
.settings

src/main/java/com/phasmidsoftware/dsaipg/sort/elementary/MergeSortDecisionTree\.java
*.kml

/api/
/SelectBenchmark.csv
334 changes: 213 additions & 121 deletions LICENSE

Large diffs are not rendered by default.

54 changes: 53 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,54 @@
# DSAIPG
Companion repository to "Data Structures, Algorithms and Invariants"
## Introduction and Provenance
Companion repository to:
"Data Structures, Algorithms, and Invariants--A Practical Guide"
by Robin Hillyard, College of Engineering, Northeastern University, Boston, MA, USA.
Published by Cognella.

This is version 1-1 (first revision of the first edition).

## Installation
This project is designed to be cloned from https://github.com/rchillyard/DSAIPG.git
Alternatively, if you will be submitting assignments based on the repository,
then you might want to fork it instead.

This repository contains a Maven project (see the `pom.xnl` file in the top level).
Ideally, you will use an IDE that is suited to Maven projects.
I recommend IntelliJ IDEA.

## Navigation
The simplest way to find code is just to use the `Navigate/Class` menu.
It is good at recognizing what you're looking for.
However, note that the first character of all classes is upper case (and you will need to search thus).

Exercises and code relating to the various chapters can be found as given below and under the package `com.phasmidsoftware.dsaipg`:
* Chapters 1 and 2: `misc` and `util`; (if any)
* Chapters 3 through 6: `adt`;
* Chapters 7 and 8: `sort` (and `select`);
* Chapters 9 and 10: `graphs`;
* Team Project: `projects`.

Other package directories contain other code.

In order to find TODOs, etc. you should use the TODO tool window.
I recommend adding the following pattern to be recognized as needing attention
(TODO is already a known pattern):
* \bTO BE IMPLEMENTED\b.*

This will make it easy to see where you have to write code.

## Building and Testing
If you have cloned (or forked) the repository into IDEA, it should build the project
for you without much intervention on your part.
You will need at least Java 17 as your SDK.
Recommended: Oracle OpenJDK 18.0.2

To test the installation, run all the tests in `src/test/java`.
There are about a thousand active tests, of which two-thirds should run green.
Don't worry about the failing tests--they fail because there are stubs in the code
that you need to replace with functioning code in many places
(see above in Navigation).

There are also functional tests in the `src/it/java` directory.
However, these take significantly longer to run and are really not necessary.

107 changes: 107 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
<!--
~ Copyright (c) 2018, 2019, 2020. Phasmid Software
-
- NOTE: this POM file is designed to include both unit and integration tests.
- The mechanism has not been entirely tested.
-->

<project>
<modelVersion>4.0.0</modelVersion>
<groupId>edu.neu.coe.mgen</groupId>
<artifactId>INFO6205</artifactId>
<version>1</version>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>17</source>
<target>18</target>
</configuration>
</plugin>
</plugins>
</build>
<properties>
<maven.compiler.source>1.17</maven.compiler.source>
<maven.compiler.target>1.18</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
</properties>
<repositories>
<repository>
<id>OSGeo</id>
<url>https://repo.osgeo.org/repository/release/</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>com.phasmidsoftware</groupId>
<artifactId>args_2.13</artifactId>
<version>1.0.3</version>
</dependency>
<dependency>
<groupId>com.phasmidsoftware</groupId>
<artifactId>number_2.13</artifactId>
<version>1.0.12</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.19.0</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<!-- The following dependency is required ONLY for MyTree and MyTreeTest in lab_1 package. -->
<!-- https://mvnrepository.com/artifact/com.google.guava/guava -->
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>31.1-jre</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.ini4j/ini4j -->
<dependency>
<groupId>org.ini4j</groupId>
<artifactId>ini4j</artifactId>
<version>0.5.4</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.cucumber</groupId>
<artifactId>cucumber-java</artifactId>
<version>7.8.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.cucumber</groupId>
<artifactId>cucumber-junit</artifactId>
<version>7.8.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.belerweb</groupId>
<artifactId>pinyin4j</artifactId>
<version>2.5.1</version>
</dependency>
<dependency>
<groupId>org.jetbrains</groupId>
<artifactId>annotations</artifactId>
<version>13.0</version>
<scope>compile</scope>
</dependency>
<!-- <dependency>
<groupId>edu.neu.coe.mgen</groupId>
<artifactId>INFO6205</artifactId>
<version>1</version>
</dependency>
-->
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Copyright (c) 2017. Phasmid Software
*/

package com.phasmidsoftware.dsaipg.adt.hashtable;

import com.phasmidsoftware.dsaipg.adt.symbolTable.hashtable.FrequencyCounter;
import org.junit.Test;

import static org.junit.Assert.assertEquals;

public class FrequencyCounterFuncTest {
@Test
public void testIncrementVeryLargeCount() {
FrequencyCounter<String> fc = new FrequencyCounter<>();
String key = "LargeKey";
for (int i = 0; i < Integer.MAX_VALUE / 2; i++) {
fc.increment(key);
if (i % 100000000 == 0) { // Periodic checkpoint to avoid exceeding memory limit
assertEquals(i + 1, fc.get(key).intValue());
}
}
assertEquals((long) Integer.MAX_VALUE / 2, fc.get(key).intValue());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package com.phasmidsoftware.dsaipg.adt.threesum;

import org.junit.Ignore;
import org.junit.Test;

import java.util.function.Supplier;

import static org.junit.Assert.assertEquals;

public class ThreeSumFuncTest {

@Ignore // Slow
public void testGetTriples3() {
Supplier<int[]> intsSupplier = new Source(1000, 1000).intsSupplier(10);
int[] ints = intsSupplier.get();
ThreeSum target = new ThreeSumQuadratic(ints);
Triple[] triplesQuadratic = target.getTriples();
Triple[] triplesCubic = new ThreeSumCubic(ints).getTriples();
int expected1 = triplesCubic.length;
assertEquals(expected1, triplesQuadratic.length);
}

@Ignore // Slow
public void testGetTriples4() {
Supplier<int[]> intsSupplier = new Source(1500, 1000).intsSupplier(10);
int[] ints = intsSupplier.get();
ThreeSum target = new ThreeSumQuadratic(ints);
Triple[] triplesQuadratic = target.getTriples();
Triple[] triplesCubic = new ThreeSumCubic(ints).getTriples();
int expected1 = triplesCubic.length;
assertEquals(expected1, triplesQuadratic.length);
}

@Test
public void testGetTriplesC4() {
// CONSIDER move this into it directory.
Supplier<int[]> intsSupplier = new Source(1500, 1000).intsSupplier(10);
int[] ints = intsSupplier.get();
ThreeSum target = new ThreeSumQuadraticWithCalipers(ints);
Triple[] triplesQuadratic = target.getTriples();
Triple[] triplesCubic = new ThreeSumCubic(ints).getTriples();
assertEquals(triplesCubic.length, triplesQuadratic.length);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*
* Copyright (c) 2017. Phasmid Software
*/

package com.phasmidsoftware.dsaipg.huskySort.sort.huskySort;

import com.phasmidsoftware.dsaipg.huskySort.sort.huskySortUtils.HuskySequenceCoder;
import com.phasmidsoftware.dsaipg.huskySort.sort.huskySortUtils.HuskySortHelper;
import com.phasmidsoftware.dsaipg.huskySort.util.Config;
import com.phasmidsoftware.dsaipg.huskySort.util.LazyLogger;
import com.phasmidsoftware.dsaipg.huskySort.util.ProcessorDependentTimeout;
import org.apache.log4j.Logger;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.Timeout;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;

/**
* Benchmark Integration Test.
* This is suitable for inclusion in integration tests, but not unit tests.
*
* <p>
* The expected time for a pure quicksort of N items and M runs is 1.39 k M N lg N (where lg represents log to the base 2).
* Bear in mind that the Benchmark code does M/10 warmup runs also.
*/

@SuppressWarnings("ALL")
public class BenchmarkIntegrationTest {

@BeforeClass
public static void BeforeClass() throws IOException {
config = Config.load();
benchmark = new HuskySortBenchmark(config);
String huskysort = "huskysort";
String name = config.get(huskysort, "version");
huskyCoder = HuskySortHelper.getSequenceCoderByName(config.get(huskysort, "huskycoder", "Unicode"));
logger.info("HuskySortBenchmark.main: " + name);
}

@Rule
public Timeout timeoutBuilder = new ProcessorDependentTimeout(10, TimeUnit.SECONDS, config);

@Test
public void testStrings10K() throws Exception {
String corpus = "eng-uk_web_2002_10K-sentences.txt";
benchmark.benchmarkStringSorters(corpus, getWordsLeipzig(corpus), 10000, 3800, huskyCoder);
}

private final static String[] getWordsLeipzig(String s) throws FileNotFoundException {
return HuskySortBenchmarkHelper.getWords(s, line -> HuskySortBenchmarkHelper.splitLineIntoStrings(line, REGEX_LEIPZIG, HuskySortBenchmarkHelper.REGEX_STRING_SPLITTER));
}

@Test
public void testStrings100K() throws Exception {
// NOTE: you cannot include insertionSort among the sort methods to be used: it WILL time out here.
String corpus = "eng-uk_web_2002_100K-sentences.txt";
benchmark.benchmarkStringSorters(corpus, getWordsLeipzig(corpus), 100000, 255, huskyCoder);
}

@Test
public void testDates10K() throws Exception {
benchmark.sortLocalDateTimes(37000, 1000000);
}

@Test
public void testDates100K() throws Exception {
benchmark.sortLocalDateTimes(44000, 1000000);
}

@Test
public void testStrings10KInstrumented() throws Exception {
benchmark.benchmarkStringSortersInstrumented(getWordsLeipzig("eng-uk_web_2002_10K-sentences.txt"), 10000, 950, huskyCoder);
}

// @Test(timeout = 140000)
// public void testStrings100KInstrumented() throws Exception {
// // NOTE: you cannot include insertionSort among the sort methods to be used: it WILL time out here.
// huskyCoder = asciiCoder;
// benchmark.benchmarkStringSortersInstrumented(getWords("eng-uk_web_2002_100K-sentences.txt", line -> getWords(regexLeipzig, line)), 100000, 200, huskyCoder);
// }

private final static Pattern REGEX_LEIPZIG = Pattern.compile("[~\\t]*\\t(([\\s\\p{Punct}\\uFF0C]*\\p{L}+)*)");
private static Logger logger = new LazyLogger(BenchmarkIntegrationTest.class);
private static HuskySortBenchmark benchmark;
private static Config config;
private static HuskySequenceCoder<String> huskyCoder;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package com.phasmidsoftware.dsaipg.huskySort.sort.huskySort;

import com.phasmidsoftware.dsaipg.huskySort.sort.huskySortUtils.ChineseCharacter;
import org.junit.Test;

import static com.phasmidsoftware.dsaipg.huskySort.sort.huskySort.HuskySortBenchmark.CHINESE_NAMES_CORPUS;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;

public class ChineseCharacterIntegrationTest {
@Test
public void testParsePinyinN() {
for (final String word : HuskySortBenchmarkHelper.getWords(CHINESE_NAMES_CORPUS, HuskySortBenchmark::lineAsList)) {
try {
final String[] parsedPinyin = ChineseCharacter.parsePinyin(ChineseCharacter.convertToPinyin(word), word.length());
assertEquals(word.length(), parsedPinyin.length);
} catch (final Exception e) {
fail("Failed parse pinyin for: " + word + ": " + e.getLocalizedMessage());
}
}
}
}
Loading

0 comments on commit e5c4626

Please sign in to comment.