-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
512 changed files
with
18,390 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<actions> | ||
<action> | ||
<actionName>run</actionName> | ||
<packagings> | ||
<packaging>jar</packaging> | ||
</packagings> | ||
<goals> | ||
<goal>process-classes</goal> | ||
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal> | ||
</goals> | ||
<properties> | ||
<exec.args>-classpath %classpath com.metablocking.preprocessing.BlocksFromEntityIndexDriverBalanced</exec.args> | ||
<exec.executable>java</exec.executable> | ||
</properties> | ||
</action> | ||
<action> | ||
<actionName>debug</actionName> | ||
<packagings> | ||
<packaging>jar</packaging> | ||
</packagings> | ||
<goals> | ||
<goal>process-classes</goal> | ||
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal> | ||
</goals> | ||
<properties> | ||
<exec.args>-Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath com.metablocking.preprocessing.BlocksFromEntityIndexDriverBalanced</exec.args> | ||
<exec.executable>java</exec.executable> | ||
<jpda.listen>true</jpda.listen> | ||
</properties> | ||
</action> | ||
<action> | ||
<actionName>profile</actionName> | ||
<packagings> | ||
<packaging>jar</packaging> | ||
</packagings> | ||
<goals> | ||
<goal>process-classes</goal> | ||
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal> | ||
</goals> | ||
<properties> | ||
<exec.args>-classpath %classpath com.metablocking.preprocessing.BlocksFromEntityIndexDriverBalanced</exec.args> | ||
<exec.executable>java</exec.executable> | ||
</properties> | ||
</action> | ||
</actions> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
<groupId>com.metablocking</groupId> | ||
<artifactId>MetaBlocking</artifactId> | ||
<version>1.0</version> | ||
<packaging>jar</packaging> | ||
<properties> | ||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | ||
<maven.compiler.source>1.7</maven.compiler.source> | ||
<maven.compiler.target>1.7</maven.compiler.target> | ||
</properties> | ||
|
||
<build> | ||
<plugins> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-jar-plugin</artifactId> | ||
<version>3.0.2</version> | ||
<configuration> | ||
<archive> | ||
<manifest> | ||
<addClasspath>true</addClasspath> | ||
<mainClass>preprocessing.AfterFilteringByteCounter</mainClass> | ||
</manifest> | ||
</archive> | ||
</configuration> | ||
</plugin> | ||
|
||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-compiler-plugin</artifactId> | ||
<version>3.5.1</version> | ||
<configuration> | ||
<source>1.7</source> | ||
<target>1.7</target> | ||
|
||
<excludes> | ||
<exclude>blockingGraphBuilding/AllBlockComparisonsDriverBalancedAdvanced.java</exclude> | ||
</excludes> | ||
</configuration> | ||
</plugin> | ||
</plugins> | ||
</build> | ||
|
||
<dependencies> | ||
<!-- http://mvnrepository.com/artifact/org.apache.hadoop/hadoop-core --> | ||
<dependency> | ||
<groupId>org.apache.hadoop</groupId> | ||
<artifactId>hadoop-core</artifactId> | ||
<version>1.2.0</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>blockingFramework</groupId> | ||
<artifactId>meta_blocking</artifactId> | ||
<version>1.0</version> | ||
<scope>compile</scope> | ||
</dependency> | ||
</dependencies> | ||
</project> |
34 changes: 34 additions & 0 deletions
34
MetaBlocking/src/main/java/advanced/AverageWeightCombiner.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
/* | ||
* Copyright (C) 2015 Vasilis Efthymiou <vefthym@ics.forth.gr> | ||
*/ | ||
package advanced; | ||
|
||
import java.io.IOException; | ||
import java.util.Iterator; | ||
|
||
import org.apache.hadoop.io.ByteWritable; | ||
import org.apache.hadoop.io.DoubleWritable; | ||
import org.apache.hadoop.mapred.MapReduceBase; | ||
import org.apache.hadoop.mapred.OutputCollector; | ||
import org.apache.hadoop.mapred.Reducer; | ||
import org.apache.hadoop.mapred.Reporter; | ||
|
||
|
||
public class AverageWeightCombiner extends MapReduceBase implements Reducer<ByteWritable, DoubleWritable, ByteWritable, DoubleWritable> { | ||
|
||
/** | ||
* identity mapper - just keep a counter to sum up weights | ||
* @param key i,j entity ids | ||
* @param value wij the weight of this edge | ||
* @param output identical to intput (identity mapper) | ||
*/ | ||
public void reduce(ByteWritable key, Iterator<DoubleWritable> values, | ||
OutputCollector<ByteWritable, DoubleWritable> output, Reporter reporter) throws IOException { | ||
double totalWeight = 0; | ||
while (values.hasNext()) { | ||
totalWeight += values.next().get(); | ||
} | ||
output.collect(key, new DoubleWritable(totalWeight)); | ||
} | ||
|
||
} |
123 changes: 123 additions & 0 deletions
123
MetaBlocking/src/main/java/advanced/AverageWeightDriver.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
/* | ||
* Copyright (C) 2015 Vasilis Efthymiou <vefthym@ics.forth.gr> | ||
*/ | ||
package advanced; | ||
|
||
import java.io.BufferedReader; | ||
import java.io.BufferedWriter; | ||
import java.io.IOException; | ||
import java.io.InputStreamReader; | ||
import java.io.OutputStreamWriter; | ||
|
||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.conf.Configured; | ||
import org.apache.hadoop.fs.FileSystem; | ||
import org.apache.hadoop.fs.Path; | ||
import org.apache.hadoop.io.DoubleWritable; | ||
import org.apache.hadoop.io.Text; | ||
import org.apache.hadoop.io.SequenceFile.CompressionType; | ||
import org.apache.hadoop.mapred.Counters; | ||
import org.apache.hadoop.mapred.FileInputFormat; | ||
import org.apache.hadoop.mapred.FileOutputFormat; | ||
import org.apache.hadoop.mapred.JobClient; | ||
import org.apache.hadoop.mapred.JobConf; | ||
import org.apache.hadoop.mapred.RunningJob; | ||
import org.apache.hadoop.mapred.SequenceFileInputFormat; | ||
import org.apache.hadoop.mapred.SequenceFileOutputFormat; | ||
|
||
|
||
public class AverageWeightDriver extends Configured { | ||
|
||
public static void main(String[] args) { | ||
JobClient client = new JobClient(); | ||
JobConf conf = new JobConf(advanced.AverageWeightDriver.class); | ||
|
||
conf.setJobName("Average Edge Weight using Extended Input"); //used for WEP | ||
|
||
conf.setOutputKeyClass(Text.class); | ||
conf.setOutputValueClass(DoubleWritable.class); | ||
|
||
conf.setInputFormat(SequenceFileInputFormat.class); | ||
conf.setOutputFormat(SequenceFileOutputFormat.class); | ||
SequenceFileOutputFormat.setOutputCompressionType(conf, CompressionType.BLOCK); | ||
|
||
conf.set("weightingScheme", args[0]); //one of: CBS, ECBS, JS, EJS, ARCS | ||
FileInputFormat.setInputPaths(conf, new Path(args[1])); //Blocking Graph | ||
FileOutputFormat.setOutputPath(conf, new Path(args[2])); //All unique comparisons with their weight | ||
|
||
conf.setMapperClass(advanced.AverageWeightMapperNewFromCompressed.class); | ||
//conf.setCombinerClass(advanced.AverageWeightCombiner.class); | ||
//conf.setReducerClass(advanced.AverageWeightReducer.class); | ||
|
||
conf.setNumReduceTasks(0); | ||
|
||
BufferedReader br2 = null, br3 = null; | ||
try{ | ||
Path cleanPath=new Path("/user/hduser/numBlocksClean.txt"); | ||
Path dirtyPath=new Path("/user/hduser/numBlocksDirty.txt"); | ||
FileSystem fs = FileSystem.get(new Configuration()); | ||
br2=new BufferedReader(new InputStreamReader(fs.open(cleanPath))); | ||
Integer cleanBlocks = Integer.parseInt(br2.readLine()); | ||
conf.setInt("cleanBlocks", cleanBlocks); | ||
br3=new BufferedReader(new InputStreamReader(fs.open(dirtyPath))); | ||
Integer dirtyBlocks = Integer.parseInt(br3.readLine()); | ||
conf.setInt("dirtyBlocks", dirtyBlocks); | ||
|
||
if (args[0].equals("EJS")) { | ||
Path pt2= new Path("/user/hduser/validComparisons.txt"); | ||
br2=new BufferedReader(new InputStreamReader(fs.open(pt2))); | ||
String validComparisons = br2.readLine(); | ||
conf.set("validComparisons", validComparisons); | ||
} | ||
|
||
}catch(Exception e){ | ||
System.err.println(e.toString()); | ||
} finally { | ||
try { br2.close();br3.close(); } | ||
catch (IOException e) {System.err.println(e.toString());} | ||
} | ||
|
||
|
||
|
||
|
||
// conf.setCompressMapOutput(true); | ||
conf.set("mapred.max.tracker.failures", "100"); //before it gets black-listed | ||
conf.set("mapred.job.tracker.handler.count", "40"); | ||
conf.setInt("mapred.task.timeout", 10000000); //before the non-reporting task fails | ||
|
||
|
||
|
||
|
||
client.setConf(conf); | ||
RunningJob job = null; | ||
try { | ||
job = JobClient.runJob(conf); | ||
} catch (Exception e) { | ||
e.printStackTrace(); | ||
} | ||
|
||
//the following is used only for CNP,CEPTotalOrder but does not create any overhead (keep it always) | ||
if (job == null) { | ||
System.err.println("No job found"); | ||
return; | ||
} | ||
|
||
try { | ||
Counters counters = job.getCounters(); | ||
double totalWeight = counters.findCounter(advanced.AverageWeightMapperNewFromCompressed.Weight.WEIGHT_COUNTER).getCounter() / 1000.0; | ||
long comparisons = counters.findCounter("org.apache.hadoop.mapred.Task$Counter", | ||
"MAP_OUTPUT_RECORDS").getCounter(); | ||
Double averageWeight = totalWeight / comparisons; | ||
Path pt=new Path("/user/hduser/averageWeight.txt"); | ||
FileSystem fs = FileSystem.get(new Configuration()); | ||
BufferedWriter br=new BufferedWriter(new OutputStreamWriter(fs.create(pt,true))); | ||
br.write(averageWeight.toString()); | ||
br.close(); | ||
} catch (IllegalArgumentException | IOException e) { | ||
System.err.println(e.toString()); | ||
} | ||
} | ||
|
||
|
||
|
||
} |
99 changes: 99 additions & 0 deletions
99
MetaBlocking/src/main/java/advanced/AverageWeightEJSDriver.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
/* | ||
* Copyright (C) 2015 Vasilis Efthymiou <vefthym@ics.forth.gr> | ||
*/ | ||
package advanced; | ||
|
||
import java.io.BufferedReader; | ||
import java.io.BufferedWriter; | ||
import java.io.IOException; | ||
import java.io.InputStreamReader; | ||
import java.io.OutputStreamWriter; | ||
|
||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.conf.Configured; | ||
import org.apache.hadoop.fs.FileSystem; | ||
import org.apache.hadoop.fs.Path; | ||
import org.apache.hadoop.io.DoubleWritable; | ||
import org.apache.hadoop.io.Text; | ||
import org.apache.hadoop.io.SequenceFile.CompressionType; | ||
import org.apache.hadoop.mapred.Counters; | ||
import org.apache.hadoop.mapred.FileInputFormat; | ||
import org.apache.hadoop.mapred.FileOutputFormat; | ||
import org.apache.hadoop.mapred.JobClient; | ||
import org.apache.hadoop.mapred.JobConf; | ||
import org.apache.hadoop.mapred.RunningJob; | ||
import org.apache.hadoop.mapred.SequenceFileInputFormat; | ||
import org.apache.hadoop.mapred.SequenceFileOutputFormat; | ||
|
||
|
||
public class AverageWeightEJSDriver extends Configured { | ||
|
||
public static void main(String[] args) { | ||
JobClient client = new JobClient(); | ||
JobConf conf = new JobConf(advanced.AverageWeightEJSDriver.class); | ||
|
||
conf.setJobName("Average Edge Weight using Extended Input"); //used for WEP | ||
|
||
conf.setOutputKeyClass(Text.class); | ||
conf.setOutputValueClass(DoubleWritable.class); | ||
|
||
conf.setInputFormat(SequenceFileInputFormat.class); | ||
conf.setOutputFormat(SequenceFileOutputFormat.class); | ||
SequenceFileOutputFormat.setOutputCompressionType(conf, CompressionType.BLOCK); | ||
|
||
FileInputFormat.setInputPaths(conf, new Path(args[0])); //EJSFinal | ||
FileOutputFormat.setOutputPath(conf, new Path(args[1])); //All unique comparisons with their weight | ||
|
||
conf.setMapperClass(advanced.AverageWeightEJSMapper.class); | ||
//conf.setCombinerClass(advanced.AverageWeightCombiner.class); | ||
//conf.setReducerClass(advanced.AverageWeightReducer.class); | ||
|
||
conf.setNumReduceTasks(0); | ||
|
||
BufferedReader br = null; | ||
try{ | ||
Path pt= new Path("/user/hduser/validComparisons.txt"); | ||
FileSystem fs = FileSystem.get(new Configuration()); | ||
br=new BufferedReader(new InputStreamReader(fs.open(pt))); | ||
String validComparisons = br.readLine(); | ||
conf.set("validComparisons", validComparisons); | ||
}catch(Exception e){ | ||
System.err.println(e.toString()); | ||
} finally { | ||
try { br.close(); } | ||
catch (IOException e) {System.err.println(e.toString());} | ||
} | ||
|
||
client.setConf(conf); | ||
RunningJob job = null; | ||
try { | ||
job = JobClient.runJob(conf); | ||
} catch (Exception e) { | ||
e.printStackTrace(); | ||
} | ||
|
||
//the following is used only for CNP,CEPTotalOrder but does not create any overhead (keep it always) | ||
if (job == null) { | ||
System.err.println("No job found"); | ||
return; | ||
} | ||
|
||
try { | ||
Counters counters = job.getCounters(); | ||
double totalWeight = counters.findCounter(advanced.AverageWeightEJSMapper.Weight.WEIGHT_COUNTER).getCounter() / 1000.0; | ||
long comparisons = counters.findCounter("org.apache.hadoop.mapred.Task$Counter", | ||
"MAP_OUTPUT_RECORDS").getCounter(); | ||
Double averageWeight = (double) totalWeight / comparisons; | ||
Path pt=new Path("/user/hduser/averageWeight.txt"); | ||
FileSystem fs = FileSystem.get(new Configuration()); | ||
BufferedWriter bw=new BufferedWriter(new OutputStreamWriter(fs.create(pt,true))); | ||
bw.write(averageWeight.toString()); | ||
bw.close(); | ||
} catch (IllegalArgumentException | IOException e) { | ||
System.err.println(e.toString()); | ||
} | ||
} | ||
|
||
|
||
|
||
} |
Oops, something went wrong.