Skip to content

Commit

Permalink
added as maven project
Browse files Browse the repository at this point in the history
  • Loading branch information
vefthym committed Jul 30, 2016
1 parent a648a32 commit aa3a736
Show file tree
Hide file tree
Showing 512 changed files with 18,390 additions and 0 deletions.
46 changes: 46 additions & 0 deletions MetaBlocking/nbactions.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<?xml version="1.0" encoding="UTF-8"?>
<actions>
<action>
<actionName>run</actionName>
<packagings>
<packaging>jar</packaging>
</packagings>
<goals>
<goal>process-classes</goal>
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
</goals>
<properties>
<exec.args>-classpath %classpath com.metablocking.preprocessing.BlocksFromEntityIndexDriverBalanced</exec.args>
<exec.executable>java</exec.executable>
</properties>
</action>
<action>
<actionName>debug</actionName>
<packagings>
<packaging>jar</packaging>
</packagings>
<goals>
<goal>process-classes</goal>
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
</goals>
<properties>
<exec.args>-Xdebug -Xrunjdwp:transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath com.metablocking.preprocessing.BlocksFromEntityIndexDriverBalanced</exec.args>
<exec.executable>java</exec.executable>
<jpda.listen>true</jpda.listen>
</properties>
</action>
<action>
<actionName>profile</actionName>
<packagings>
<packaging>jar</packaging>
</packagings>
<goals>
<goal>process-classes</goal>
<goal>org.codehaus.mojo:exec-maven-plugin:1.2.1:exec</goal>
</goals>
<properties>
<exec.args>-classpath %classpath com.metablocking.preprocessing.BlocksFromEntityIndexDriverBalanced</exec.args>
<exec.executable>java</exec.executable>
</properties>
</action>
</actions>
60 changes: 60 additions & 0 deletions MetaBlocking/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.metablocking</groupId>
<artifactId>MetaBlocking</artifactId>
<version>1.0</version>
<packaging>jar</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
</properties>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
<configuration>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<mainClass>preprocessing.AfterFilteringByteCounter</mainClass>
</manifest>
</archive>
</configuration>
</plugin>

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.5.1</version>
<configuration>
<source>1.7</source>
<target>1.7</target>

<excludes>
<exclude>blockingGraphBuilding/AllBlockComparisonsDriverBalancedAdvanced.java</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>

<dependencies>
<!-- http://mvnrepository.com/artifact/org.apache.hadoop/hadoop-core -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>1.2.0</version>
</dependency>
<dependency>
<groupId>blockingFramework</groupId>
<artifactId>meta_blocking</artifactId>
<version>1.0</version>
<scope>compile</scope>
</dependency>
</dependencies>
</project>
34 changes: 34 additions & 0 deletions MetaBlocking/src/main/java/advanced/AverageWeightCombiner.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Copyright (C) 2015 Vasilis Efthymiou <vefthym@ics.forth.gr>
*/
package advanced;

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;


public class AverageWeightCombiner extends MapReduceBase implements Reducer<ByteWritable, DoubleWritable, ByteWritable, DoubleWritable> {

/**
* identity mapper - just keep a counter to sum up weights
* @param key i,j entity ids
* @param value wij the weight of this edge
* @param output identical to intput (identity mapper)
*/
public void reduce(ByteWritable key, Iterator<DoubleWritable> values,
OutputCollector<ByteWritable, DoubleWritable> output, Reporter reporter) throws IOException {
double totalWeight = 0;
while (values.hasNext()) {
totalWeight += values.next().get();
}
output.collect(key, new DoubleWritable(totalWeight));
}

}
123 changes: 123 additions & 0 deletions MetaBlocking/src/main/java/advanced/AverageWeightDriver.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/*
* Copyright (C) 2015 Vasilis Efthymiou <vefthym@ics.forth.gr>
*/
package advanced;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;


public class AverageWeightDriver extends Configured {

public static void main(String[] args) {
JobClient client = new JobClient();
JobConf conf = new JobConf(advanced.AverageWeightDriver.class);

conf.setJobName("Average Edge Weight using Extended Input"); //used for WEP

conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(DoubleWritable.class);

conf.setInputFormat(SequenceFileInputFormat.class);
conf.setOutputFormat(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputCompressionType(conf, CompressionType.BLOCK);

conf.set("weightingScheme", args[0]); //one of: CBS, ECBS, JS, EJS, ARCS
FileInputFormat.setInputPaths(conf, new Path(args[1])); //Blocking Graph
FileOutputFormat.setOutputPath(conf, new Path(args[2])); //All unique comparisons with their weight

conf.setMapperClass(advanced.AverageWeightMapperNewFromCompressed.class);
//conf.setCombinerClass(advanced.AverageWeightCombiner.class);
//conf.setReducerClass(advanced.AverageWeightReducer.class);

conf.setNumReduceTasks(0);

BufferedReader br2 = null, br3 = null;
try{
Path cleanPath=new Path("/user/hduser/numBlocksClean.txt");
Path dirtyPath=new Path("/user/hduser/numBlocksDirty.txt");
FileSystem fs = FileSystem.get(new Configuration());
br2=new BufferedReader(new InputStreamReader(fs.open(cleanPath)));
Integer cleanBlocks = Integer.parseInt(br2.readLine());
conf.setInt("cleanBlocks", cleanBlocks);
br3=new BufferedReader(new InputStreamReader(fs.open(dirtyPath)));
Integer dirtyBlocks = Integer.parseInt(br3.readLine());
conf.setInt("dirtyBlocks", dirtyBlocks);

if (args[0].equals("EJS")) {
Path pt2= new Path("/user/hduser/validComparisons.txt");
br2=new BufferedReader(new InputStreamReader(fs.open(pt2)));
String validComparisons = br2.readLine();
conf.set("validComparisons", validComparisons);
}

}catch(Exception e){
System.err.println(e.toString());
} finally {
try { br2.close();br3.close(); }
catch (IOException e) {System.err.println(e.toString());}
}




// conf.setCompressMapOutput(true);
conf.set("mapred.max.tracker.failures", "100"); //before it gets black-listed
conf.set("mapred.job.tracker.handler.count", "40");
conf.setInt("mapred.task.timeout", 10000000); //before the non-reporting task fails




client.setConf(conf);
RunningJob job = null;
try {
job = JobClient.runJob(conf);
} catch (Exception e) {
e.printStackTrace();
}

//the following is used only for CNP,CEPTotalOrder but does not create any overhead (keep it always)
if (job == null) {
System.err.println("No job found");
return;
}

try {
Counters counters = job.getCounters();
double totalWeight = counters.findCounter(advanced.AverageWeightMapperNewFromCompressed.Weight.WEIGHT_COUNTER).getCounter() / 1000.0;
long comparisons = counters.findCounter("org.apache.hadoop.mapred.Task$Counter",
"MAP_OUTPUT_RECORDS").getCounter();
Double averageWeight = totalWeight / comparisons;
Path pt=new Path("/user/hduser/averageWeight.txt");
FileSystem fs = FileSystem.get(new Configuration());
BufferedWriter br=new BufferedWriter(new OutputStreamWriter(fs.create(pt,true)));
br.write(averageWeight.toString());
br.close();
} catch (IllegalArgumentException | IOException e) {
System.err.println(e.toString());
}
}



}
99 changes: 99 additions & 0 deletions MetaBlocking/src/main/java/advanced/AverageWeightEJSDriver.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/*
* Copyright (C) 2015 Vasilis Efthymiou <vefthym@ics.forth.gr>
*/
package advanced;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;


public class AverageWeightEJSDriver extends Configured {

public static void main(String[] args) {
JobClient client = new JobClient();
JobConf conf = new JobConf(advanced.AverageWeightEJSDriver.class);

conf.setJobName("Average Edge Weight using Extended Input"); //used for WEP

conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(DoubleWritable.class);

conf.setInputFormat(SequenceFileInputFormat.class);
conf.setOutputFormat(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputCompressionType(conf, CompressionType.BLOCK);

FileInputFormat.setInputPaths(conf, new Path(args[0])); //EJSFinal
FileOutputFormat.setOutputPath(conf, new Path(args[1])); //All unique comparisons with their weight

conf.setMapperClass(advanced.AverageWeightEJSMapper.class);
//conf.setCombinerClass(advanced.AverageWeightCombiner.class);
//conf.setReducerClass(advanced.AverageWeightReducer.class);

conf.setNumReduceTasks(0);

BufferedReader br = null;
try{
Path pt= new Path("/user/hduser/validComparisons.txt");
FileSystem fs = FileSystem.get(new Configuration());
br=new BufferedReader(new InputStreamReader(fs.open(pt)));
String validComparisons = br.readLine();
conf.set("validComparisons", validComparisons);
}catch(Exception e){
System.err.println(e.toString());
} finally {
try { br.close(); }
catch (IOException e) {System.err.println(e.toString());}
}

client.setConf(conf);
RunningJob job = null;
try {
job = JobClient.runJob(conf);
} catch (Exception e) {
e.printStackTrace();
}

//the following is used only for CNP,CEPTotalOrder but does not create any overhead (keep it always)
if (job == null) {
System.err.println("No job found");
return;
}

try {
Counters counters = job.getCounters();
double totalWeight = counters.findCounter(advanced.AverageWeightEJSMapper.Weight.WEIGHT_COUNTER).getCounter() / 1000.0;
long comparisons = counters.findCounter("org.apache.hadoop.mapred.Task$Counter",
"MAP_OUTPUT_RECORDS").getCounter();
Double averageWeight = (double) totalWeight / comparisons;
Path pt=new Path("/user/hduser/averageWeight.txt");
FileSystem fs = FileSystem.get(new Configuration());
BufferedWriter bw=new BufferedWriter(new OutputStreamWriter(fs.create(pt,true)));
bw.write(averageWeight.toString());
bw.close();
} catch (IllegalArgumentException | IOException e) {
System.err.println(e.toString());
}
}



}
Loading

0 comments on commit aa3a736

Please sign in to comment.