本文整理汇总了C++中dataset类的典型用法代码示例。如果您正苦于以下问题:C++ dataset类的具体用法?C++ dataset怎么用?C++ dataset使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了dataset类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: foreach_well
F foreach_well(const dataset& data, F fn, std::string id_field)
{
const auto& id = data.at(id_field);
std::size_t begin_rec = 0, end_rec = 0;
for (std::size_t i = 0; i < id.size(); ++i) {
if (id[i] != id[begin_rec] || i == id.size() - 1) {
if (i == id.size() - 1)
end_rec = i;
dataset well;
std::for_each(data.begin(), data.end(),
[&](const std::pair<std::string,
std::vector<std::string>>& column)
{
well[column.first] = std::vector<std::string>(
column.second.data() + begin_rec,
column.second.data() + end_rec + 1);
}
);
fn(well);
begin_rec = i;
}
end_rec = i;
}
return fn;
}
开发者ID:derrickturk,项目名称:libdca,代码行数:29,代码来源:peakmonth.cpp
示例2: load_training_data
int model::load_training_data(const dataset &ds)
{
int nrow, ncol;
nrow = ds.ins_num();
ncol = ds.fea_num();
if (nrow <= 0 || ncol < 1) {
ULIB_FATAL("invalid training data dimensions");
return -1;
}
if (nrow > FLAGS_max_num_examples)
nrow = FLAGS_max_num_examples;
if (alloc_training_data(nrow, ncol)) {
ULIB_FATAL("couldn't allocate training data");
return -1;
}
double tavg = 0;
double tvar = 0;
for (int i = 0; i < nrow; ++i) {
double t = ds.get_tgv(i);
tavg += t;
tvar += t*t;
gsl_vector_set(_tv, i, t);
for (int j = 0; j < ncol; ++j)
gsl_matrix_set(_fm, i, j, ds.get_fea(i, j));
}
_t_avg = tavg/nrow;
_t_std = sqrt(tvar/nrow - _t_avg*_t_avg);
return 0;
}
开发者ID:ZilongTan,项目名称:MachineLearning,代码行数:34,代码来源:model.cpp
示例3: timestep
void slam::slam_data<ControlModel, ObservationModel>
::add_dataset (const dataset<ControlModel, ObservationModel>& data,
const typename ControlModel::builder& control_model_builder,
const typename ObservationModel::builder& obs_model_builder) {
using namespace boost::adaptors;
auto add_observations = [&](timestep_type t) {
for (const auto& obs : values(data.observations_at(t))) {
add_observation (obs.id, obs_model_builder(obs.observation));
}
};
add_observations (current_timestep());
timestep (current_timestep());
while (current_timestep() < data.current_timestep()) {
add_control (control_model_builder (data.control(current_timestep()),
data.timedelta(current_timestep())));
add_observations (current_timestep());
timestep (current_timestep());
}
completed();
}
开发者ID:caomw,项目名称:slam-4,代码行数:25,代码来源:slam_data.hpp
示例4: process
void silhouette_ksearch::process(const dataset & p_data, silhouette_ksearch_data & p_result) {
if (m_kmax > p_data.size()) {
throw std::invalid_argument("K max value '" + std::to_string(m_kmax) +
"' should be bigger than amount of objects '" + std::to_string(p_data.size()) + "' in input data.");
}
p_result.scores().reserve(m_kmax - m_kmin);
for (std::size_t k = m_kmin; k < m_kmax; k++) {
cluster_sequence clusters;
m_allocator->allocate(k, p_data, clusters);
if (clusters.size() != k) {
p_result.scores().push_back(std::nan("1"));
continue;
}
silhouette_data result;
silhouette().process(p_data, clusters, result);
const double score = std::accumulate(result.get_score().begin(), result.get_score().end(), (double) 0.0) / result.get_score().size();
p_result.scores().push_back(score);
if (score > p_result.get_score()) {
p_result.set_amount(k);
p_result.set_score(score);
}
}
}
开发者ID:annoviko,项目名称:pyclustering,代码行数:29,代码来源:silhouette_ksearch.cpp
示例5: generateSet
// generate a result set from two sets of datapoints of which the first set contains all
// datapoints with other datapoints in the buffer zone and the of which the second set
// contains all datapoints without other datapoints in the buffer zone
dataset generateSet(dataset& withNearbyDataset, dataset& standaloneDataset) {
random_device rd;
mt19937 rng(rd());
dataset remainingDataset(withNearbyDataset.begin(), withNearbyDataset.end());
dataset resultSet(standaloneDataset.begin(), standaloneDataset.end());
while (remainingDataset.size() != 0) {
// create iterator
dataset::iterator it = remainingDataset.begin();
// generate random index
uniform_int_distribution<int> uni(0, (int)remainingDataset.size());
int r = uni(rng);
// pick random datapoint by advancing the iterator to the random position
advance(it, r % remainingDataset.size());
// add picked datapoint to result list
resultSet.insert(*it);
// remove all datapoints within buffer zone if still in remaining dataset
for (dataset::iterator j = it->buffer.begin(); j != it->buffer.end(); ++j) {
dataset::iterator tmp = remainingDataset.find(*j);
if (tmp != remainingDataset.end()) {
remainingDataset.erase(tmp);
}
}
// remove picked datapoint from remaining list
remainingDataset.erase(remainingDataset.find(*it));
}
return resultSet;
}
开发者ID:rendro,项目名称:sampleselector,代码行数:38,代码来源:main.cpp
示例6: p_test_set
void experiment_datasets::set_train_test_pairs(const dataset & train, const dataset & test, int pair_num)
{
shared_ptr<dataset> p_test_set(test.clone());
shared_ptr<dataset> p_train_set(train.clone());
train_test_pairs.erase(train_test_pairs.begin(),train_test_pairs.end());
for (int i = 0; i < pair_num; i++)
{
train_test_pairs.push_back(train_test_pair(p_train_set,p_test_set));
}
}
开发者ID:rudaoshi,项目名称:artifact,代码行数:14,代码来源:experiment_datasets.cpp
示例7: update_clusters
void kmeans::update_clusters(const dataset & p_centers, cluster_sequence & p_clusters) {
const dataset & data = *m_ptr_data;
p_clusters.clear();
p_clusters.resize(p_centers.size());
/* fill clusters again in line with centers. */
if (m_ptr_indexes->empty()) {
std::vector<std::size_t> winners(data.size(), 0);
parallel_for(std::size_t(0), data.size(), [this, &p_centers, &winners](std::size_t p_index) {
assign_point_to_cluster(p_index, p_centers, winners);
});
for (std::size_t index_point = 0; index_point < winners.size(); index_point++) {
const std::size_t suitable_index_cluster = winners[index_point];
p_clusters[suitable_index_cluster].push_back(index_point);
}
}
else {
/* This part of code is used by X-Means and in case of parallel implementation of this part in scope of X-Means
performance is slightly reduced. Experiments has been performed our implementation and Intel TBB library.
But in K-Means case only - it works perfectly and increase performance. */
std::vector<std::size_t> winners(data.size(), 0);
parallel_for_each(*m_ptr_indexes, [this, &p_centers, &winners](std::size_t p_index) {
assign_point_to_cluster(p_index, p_centers, winners);
});
for (std::size_t index_point : *m_ptr_indexes) {
const std::size_t suitable_index_cluster = winners[index_point];
p_clusters[suitable_index_cluster].push_back(index_point);
}
}
erase_empty_clusters(p_clusters);
}
开发者ID:annoviko,项目名称:pyclustering,代码行数:35,代码来源:kmeans.cpp
示例8: temp
vector<vector<int>> random_shuffer_dataset_splitter ::split_impl(const dataset& data) const
{
vector<vector<int>> batch_ids(batch_num);
int sample_num = data.get_sample_num();
vector<int> temp(sample_num);
for (int i = 0;i<sample_num;i++)
temp[i] = i;
std::random_shuffle ( temp.begin(), temp.end() );
int batch_size = ceil(float(sample_num)/batch_num);
for (int i = 0;i<batch_num;i++)
{
int cur_batch_size = batch_size;
if (i == batch_num-1)
cur_batch_size = sample_num - (batch_num-1)*batch_size;
vector<int> cur_batch_id(cur_batch_size);
for (int j = 0;j<cur_batch_size;j++)
cur_batch_id[j] = temp[i*batch_size + j];
batch_ids[i] = cur_batch_id;
}
return batch_ids;
}
开发者ID:rudaoshi,项目名称:artifact,代码行数:30,代码来源:data_splitter.cpp
示例9: write_dataset
inline typename boost::enable_if<is_multi_array<T>, void>::type
write_dataset(dataset& dset, T const& value)
{
typedef typename T::element value_type;
hid_t type_id = ctype<value_type>::hid();
dset.write(type_id, value.origin());
}
开发者ID:KaiSzuttor,项目名称:h5xx,代码行数:7,代码来源:boost_multi_array.hpp
示例10: read_dataset
typename boost::enable_if<is_multi_array<T>, void>::type
read_dataset(dataset & data_set, T & array)
{
const int array_rank = T::dimensionality;
typedef typename T::element value_type;
// --- use temporary dataspace object to get the shape of the dataset
dataspace file_space(data_set);
if (!(file_space.rank() == array_rank))
H5XX_THROW("dataset \"" + get_name(data_set) + "\" and target array have mismatching dimensions");
boost::array<hsize_t, array_rank> file_dims = file_space.extents<array_rank>();
// --- clear array - TODO check if this feature is necessary/wanted
boost::array<size_t, array_rank> array_zero;
array_zero.assign(0);
array.resize(array_zero);
// --- resize array to match the dataset - TODO check if this feature is necessary/wanted
boost::array<size_t, array_rank> array_shape;
std::copy(file_dims.begin(), file_dims.begin() + array_rank, array_shape.begin());
array.resize(array_shape);
hid_t mem_space_id = H5S_ALL;
hid_t file_space_id = H5S_ALL;
hid_t xfer_plist_id = H5P_DEFAULT;
data_set.read(ctype<value_type>::hid(), array.origin(), mem_space_id, file_space_id, xfer_plist_id);
}
开发者ID:KaiSzuttor,项目名称:h5xx,代码行数:29,代码来源:boost_multi_array.hpp
示例11: update_medians
double kmedians::update_medians(cluster_sequence & clusters, dataset & medians) {
const dataset & data = *m_ptr_data;
const std::size_t dimension = data[0].size();
std::vector<point> prev_medians(medians);
medians.clear();
medians.resize(clusters.size(), point(dimension, 0.0));
double maximum_change = 0.0;
for (std::size_t index_cluster = 0; index_cluster < clusters.size(); index_cluster++) {
calculate_median(clusters[index_cluster], medians[index_cluster]);
double change = m_metric(prev_medians[index_cluster], medians[index_cluster]);
if (change > maximum_change) {
maximum_change = change;
}
}
return maximum_change;
}
开发者ID:annoviko,项目名称:pyclustering,代码行数:22,代码来源:kmedians.cpp
示例12: update_clusters
void kmedians::update_clusters(const dataset & medians, cluster_sequence & clusters) {
const dataset & data = *m_ptr_data;
clusters.clear();
clusters.resize(medians.size());
for (size_t index_point = 0; index_point < data.size(); index_point++) {
size_t index_cluster_optim = 0;
double distance_optim = std::numeric_limits<double>::max();
for (size_t index_cluster = 0; index_cluster < medians.size(); index_cluster++) {
double distance = m_metric(data[index_point], medians[index_cluster]);
if (distance < distance_optim) {
index_cluster_optim = index_cluster;
distance_optim = distance;
}
}
clusters[index_cluster_optim].push_back(index_point);
}
erase_empty_clusters(clusters);
}
开发者ID:annoviko,项目名称:pyclustering,代码行数:23,代码来源:kmedians.cpp
示例13: split
dataset_group dataset_splitter::split(const dataset & data) const
{
dataset_group group;
vector<vector<int>> batch_ids = this->split_impl(data);
for (int i = 0;i<batch_ids.size();i++)
{
group.add_dataset(data.sub_set(batch_ids[i]));
}
return group;
}
开发者ID:rudaoshi,项目名称:artifact,代码行数:14,代码来源:data_splitter.cpp
示例14: assign_point_to_cluster
void kmeans::assign_point_to_cluster(const std::size_t p_index_point, const dataset & p_centers, std::vector<std::size_t> & p_clusters) {
double minimum_distance = std::numeric_limits<double>::max();
size_t suitable_index_cluster = 0;
for (size_t index_cluster = 0; index_cluster < p_centers.size(); index_cluster++) {
double distance = m_metric(p_centers[index_cluster], (*m_ptr_data)[p_index_point]);
if (distance < minimum_distance) {
minimum_distance = distance;
suitable_index_cluster = index_cluster;
}
}
p_clusters[p_index_point] = suitable_index_cluster;
}
开发者ID:annoviko,项目名称:pyclustering,代码行数:15,代码来源:kmeans.cpp
示例15: percent
vector<vector<int>> random_shuffer_ratio_splitter ::split_impl(const dataset& data) const
{
vector<NumericType> percent(ratio);
NumericType total = std::accumulate(ratio.begin(),ratio.end(),0);
BOOST_FOREACH(NumericType & x,percent){ x = x/total; }
// std::transform(percent.begin(),percent.end(),percent.begin(),[total](NumericType val){return val/total;});
vector<vector<int>> group_ids(percent.size());
int sample_num = data.get_sample_num();
vector<int> temp;
std::copy(
boost::counting_iterator<unsigned int>(0),
boost::counting_iterator<unsigned int>(sample_num),
std::back_inserter(temp));
std::random_shuffle ( temp.begin(), temp.end() );
vector<int>::iterator cur_begin_iter = temp.begin();
for (int i = 0;i<percent.size();i++)
{
int cur_group_size = floor(sample_num * percent[i]);
vector<int>::iterator cur_end_iter = cur_begin_iter + cur_group_size;
if (i == percent.size()-1)
{
cur_end_iter = temp.end();
cur_group_size = cur_end_iter - cur_begin_iter;
}
vector<int> cur_group_id(cur_group_size);
copy(cur_begin_iter,cur_end_iter, cur_group_id.begin());
cur_begin_iter = cur_end_iter;
group_ids[i] = cur_group_id;
}
return group_ids;
}
开发者ID:rudaoshi,项目名称:artifact,代码行数:44,代码来源:data_splitter.cpp
示例16: chow_liu
/**
* Constructor which learns a Chow-Liu tree from the given dataset.
* @param X Variables over which to learn a tree.
* @param ds Dataset to use for computing marginals.
*/
chow_liu(const forward_range<typename F::variable_type*>& X_,
const dataset<>& ds, const parameters& params = parameters())
: params(params) {
typedef typename F::variable_type variable_type;
assert(ds.size() > 0);
std::vector<variable_type*> X(X_.begin(), X_.end());
if (X.size() == 0)
return;
// g will hold weights (mutual information) and factors F for each edge.
typedef std::pair<double, F> edge_mi_pot;
typedef undirected_graph<variable_type*, void_, edge_mi_pot> ig_type;
ig_type g;
foreach(variable_type* v, X)
g.add_vertex(v);
for (size_t i(0); i < X.size() - 1; ++i) {
for (size_t j(i+1); j < X.size(); ++j) {
typename F::domain_type
edge_dom(make_domain<variable_type>(X[i],X[j]));
F f((params.lambda < 0 ?
learn_factor<F>::learn_marginal(edge_dom, ds) :
learn_factor<F>::learn_marginal(edge_dom, ds, params.lambda)));
double mi(f.mutual_information(make_domain(X[i]), make_domain(X[j])));
g.add_edge(X[i], X[j], std::make_pair(mi, f));
if (params.retain_edge_score_mapping) {
edge_score_mapping_[edge_dom] = mi;
}
}
}
// Create a MST over the graph g.
std::vector<F> mst_factors;
kruskal_minimum_spanning_tree
(g, transformed_output(back_inserter(mst_factors),
impl::mst_edge2f_functor<F>(g)),
impl::mst_weight_functor<F>(g));
// Create a decomposable model consisting of the cliques in mst_edges
model_ *= mst_factors;
}
开发者ID:vdeepak13,项目名称:sill,代码行数:46,代码来源:chow_liu.hpp
示例17: ceil
vector<vector<int>> ordered_dataset_splitter ::split_impl(const dataset& data) const
{
vector<vector<int>> batch_ids(batch_num);
int sample_num = data.get_sample_num();
int batch_size = ceil(float(sample_num)/batch_num);
for (int i = 0;i<batch_num;i++)
{
int cur_batch_size = batch_size;
if (i == batch_num-1)
cur_batch_size = sample_num - (batch_num-1)*batch_size;
vector<int> cur_batch_id(cur_batch_size);
for (int j = 0;j<cur_batch_size;j++)
cur_batch_id[j] = i*batch_size + j;
batch_ids.push_back(cur_batch_id);
}
return batch_ids;
}
开发者ID:rudaoshi,项目名称:artifact,代码行数:22,代码来源:data_splitter.cpp
示例18: read_data
void read_data(ifstream & in, dataset & data) {
string line;
sequence seq;
while (getline(in, line)) {
strtokenizer tok(line, " \t\r\n");
int len = tok.count_tokens();
if (len <= 0) {
if (seq.size() > 0) {
data.push_back(seq);
}
seq.clear();
continue;
}
obsr ob;
for (int i = 0; i < len; i++) {
ob.push_back(tok.token(i));
}
seq.push_back(ob);
}
}
开发者ID:tqtg,项目名称:FlexCRFs,代码行数:24,代码来源:evaluatechk.cpp
示例19: object_handle_
object::object(const dataset& object_) : object_handle_(object_.native_handle())
{
}
开发者ID:qbb-project,项目名称:echelon,代码行数:3,代码来源:object.cpp
示例20: predict
int model::predict(const dataset &tds, gsl_matrix **pp)
{
int ret = -1;
gsl_matrix *mat = NULL;
gsl_matrix *ptv = NULL;
gsl_matrix *km1 = NULL;
gsl_matrix *km2 = NULL;
gsl_matrix *res = NULL;
gsl_matrix *stm = NULL;
gsl_vector_view avg_col;
gsl_vector_view dv;
if (tds.ins_num() <= 0 || tds.fea_num() != (int)_col_mean->size) {
ULIB_FATAL("invalid test dimensions, (ins_num=%d,fea_num=%d)",
tds.ins_num(), tds.fea_num());
goto done;
}
mat = gsl_matrix_alloc(tds.ins_num(), tds.fea_num());
if (mat == NULL) {
ULIB_FATAL("couldn't allocate test feature matrix");
goto done;
}
ptv = gsl_matrix_alloc(tds.ins_num(), 2);
if (ptv == NULL) {
ULIB_FATAL("couldn't allocate prediction matrix");
goto done;
}
if (tds.get_matrix(mat)) {
ULIB_FATAL("couldn't get test matrix");
goto done;
}
dbg_print_mat(mat, "Test Matrix:");
zero_out_mat(mat);
norm_mat(mat);
dbg_print_mat(mat, "Normalized Test Matrix:");
km1 = comp_kern_mat(mat, _fm, _kern);
if (km1 == NULL) {
ULIB_FATAL("couldn't compute test1 kernel matrix");
goto done;
}
dbg_print_mat(km1, "Test Kernel Matrix:");
km2 = comp_kern_mat(mat, mat, _kern);
if (km2 == NULL) {
ULIB_FATAL("couldn't compute test2 kernel matrix");
goto done;
}
dbg_print_mat(km1, "Test Kernel Matrix:");
dv = gsl_matrix_diagonal(km2);
res = gsl_matrix_alloc(km1->size1, _ikm->size2);
if (res == NULL) {
ULIB_FATAL("couldn't allocate temporary matrix");
goto done;
}
stm = gsl_matrix_alloc(km2->size1, km2->size2);
if (stm == NULL) {
ULIB_FATAL("couldn't allocate std matrix");
goto done;
}
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, km1, _ikm, 0.0, res);
gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, res, km1, 0.0, stm);
gsl_matrix_sub(km2, stm);
dbg_print_mat(res, "Predictive Matrix:");
avg_col = gsl_matrix_column(ptv, 0);
gsl_blas_dgemv(CblasNoTrans, 1.0, res, _tv, 0.0, &avg_col.vector);
gsl_vector_add_constant(&avg_col.vector, _t_avg);
gsl_matrix_scale(km2, _t_std*_t_std);
gsl_vector_add_constant(&dv.vector, _noise_var);
for (size_t i = 0; i < km2->size1; ++i)
gsl_matrix_set(ptv, i, 1, sqrt(gsl_vector_get(&dv.vector, i)));
*pp = ptv;
ptv = NULL;
ret = 0;
done:
gsl_matrix_free(mat);
gsl_matrix_free(ptv);
gsl_matrix_free(km1);
gsl_matrix_free(km2);
gsl_matrix_free(res);
gsl_matrix_free(stm);
return ret;
}
开发者ID:ZilongTan,项目名称:MachineLearning,代码行数:90,代码来源:model.cpp
注:本文中的dataset类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论