package org.talend.mydistance;
import org.talend.dataquality.record.linkage.attribute.AbstractAttributeMatcher;
import org.talend.dataquality.record.linkage.constant.AttributeMatcherType;
/**
* @author scorreia
*
* Example of Matching distance.
*/
public class MyDistance extends AbstractAttributeMatcher {
/*
* (non-Javadoc)
*
* @see org.talend.dataquality.record.linkage.attribute.IAttributeMatcher#getMatchType()
*/
@Override
public AttributeMatcherType getMatchType() {
// a custom implementation should return this type AttributeMatcherType.custom
return AttributeMatcherType.CUSTOM;
}
/*
* (non-Javadoc)
*
* @see org.talend.dataquality.record.linkage.attribute.IAttributeMatcher#getMatchingWeight(java.lang.String,
* java.lang.String)
*/
@Override
public double getWeight(String arg0, String arg1) {
// Here goes the custom implementation of the matching distance between the two given strings.
// the algorithm should return a value between 0 and 1.
// in this example, we consider that 2 strings match if their first 4 characters are identical
// the arguments are not null (the check for nullity is done by the caller)
int MAX_CHAR = 4;
final int max = Math.min(MAX_CHAR, Math.min(arg0.length(), arg1.length()));
int nbIdenticalChar = 0;
for (; nbIdenticalChar < max; nbIdenticalChar++) {
if (arg0.charAt(nbIdenticalChar) != arg1.charAt(nbIdenticalChar)) {
break;
}
}
if (arg0.length() < MAX_CHAR && arg1.length() < MAX_CHAR) {
MAX_CHAR = Math.max(arg0.length(), arg1.length());
}
return (nbIdenticalChar) / ((double) MAX_CHAR);
}