• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python io.mmwrite函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中scipy.io.mmwrite函数的典型用法代码示例。如果您正苦于以下问题:Python mmwrite函数的具体用法?Python mmwrite怎么用?Python mmwrite使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了mmwrite函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: RWWR

def RWWR(alpha, nbBasket, nbReco):
	data = load()
	###############################################################
	# CREATE MODELS
	###############################################################
	print 'Create the model based on the training set'
	
	modelRWWR = processing.RandomWalkWithRestartRecoModel(data.getUserItemMatrix(), alpha)
		
	###############################################################
	# SET RECOMMENDATION
	###############################################################
	if nbBasket == -1:
		evalRWWR = processing.Evaluation(modelRWWR, data.getBasketItemList(), nbReco)
	else :
		evalRWWR = processing.Evaluation(modelRWWR, data.getBasketItemList()[:nbBasket], nbReco)
	
	###############################################################
	# LAUNCH RECOMMENDATION + SAVE RESULTS
	###############################################################	
	t = time.time()
	evalRWWR.newEval()
	RWWRTime = time.time()-t
	mmwrite('RWWR_a%s_nb%s'%(alpha, nbBasket),evalRWWR.perf) 
	
	print 'RWWR Execution time:', RWWRTime
	print 'Performances :'
	print evalRWWR.testNames
	print evalRWWR.meanPerf()
	evalRWWR.savePerf('RWWR_a%s_nb%s'%(alpha, nbBasket))
	return evalRWWR
开发者ID:kfrancoi,项目名称:phd-retailreco,代码行数:31,代码来源:Experiences_modif.py


示例2: genJaccard

def genJaccard(feature_matrix):
   
    jaccard_matrix_pre = []

    #jaccard_matrix_pre is a list of arrays that contain non-zero indicies of each article in the corpus
    for i in feature_matrix[0:test_num]:
        indicies = np.flatnonzero(i)
        jaccard_matrix_pre.append(indicies)
    
    S=sparse.dok_matrix((test_num, test_num))
    t0=time.time()
    numi=0
    for i in jaccard_matrix_pre:
        jnum=0
        for j in jaccard_matrix_pre[0:numi+1]: #decrease number of calculations to n choose 2 instead of n^2
            diviser = float(len(set(i).union(set(j))))
            if diviser != 0:
                actual_jaccard = float(len(set(i).intersection(set(j))))/diviser
                if actual_jaccard != 0 and actual_jaccard!=1:
                    S[numi,jnum] = actual_jaccard
            jnum=jnum+1
        numi = numi+1          
    with open('pickled_minhash/actual_jaccard_matrix_small.mtx', 'wb') as f:
        #size of feature_matrix_large: 1261 x 19043
        io.mmwrite(f, S)
    
    print("TIME to generate jaccard_matrix: {}".format(time.time()-t0))
开发者ID:rosensts,项目名称:reuters-articles-machine-learning,代码行数:27,代码来源:minhash.py


示例3: test1D2

    def test1D2():
        spl = splineRefMat(DIM_1D)
    #    list_r = list(np.random.random(20))
        list_r = [0.1,0.2,0.3]

        nx = 3
        px = 2
        geo = line(n=[nx], p=[px])

        nrb     = geo[0]
        knots   = nrb.knots[0]
        n       = nrb.shape[0]
        p       = nrb.degree[0]
        P       = nrb.points

        M = spl.construct(list_r, p, n, knots)
        from scipy.io import mmwrite
        mmwrite('M.mtx', M)
        R = M.dot(nrb.points[:,0])

        geo = line(n=[nx], p=[px])
        geo.refine(id=0, list_t=[list_r])
        nrb     = geo[0]
        P = np.asarray(nrb.points[:,0])

        assert(np.allclose(P,R))
        print("test1D2: OK")
开发者ID:sommaric,项目名称:caid,代码行数:27,代码来源:extraction.py


示例4: run

    def run(self, ratio, input_db, output_mat):
        db = sqlite3.connect(input_db)
        # assume no empty users
        users = db.execute("""SELECT Users.[Id] FROM Users""").fetchall()
        # pick <ratio> of them for training db, pick <ratio/10> of them for test db
        train_ids = []
        test_ids = []

        test_threshold = ratio/10
        train_threshold = test_threshold + ratio
        for u in users:
            rnd = random.random()
            if (rnd <= test_threshold):
                test_ids.append(u[0])
            elif (rnd <= train_threshold):
                train_ids.append(u[0])

        train_matrix = self.data_to_matrix(db, train_ids).tocsc()
        test_matrix = self.data_to_matrix(db, test_ids).tocsc()

        (train_matrix, test_matrix) = self.trim_matrices(train_matrix, test_matrix)

        savemat(output_mat, {'train' : train_matrix,'test' : test_matrix}, oned_as = 'row')
        mmwrite(output_mat + '.train', train_matrix)
        mmwrite(output_mat + '.test', test_matrix)
        print("Done!")
开发者ID:vosen,项目名称:Juiz,代码行数:26,代码来源:pick_test_db.py


示例5: __init__

 def __init__(self, programEntities, sim=ssd.correlation):
   cleaner = DataCleaner()
   nusers = len(programEntities.userIndex.keys())
   fin = open("../Data/users.csv", 'rb')
   colnames = fin.readline().strip().split(",")
   self.userMatrix = ss.dok_matrix((nusers, len(colnames) - 1))
   for line in fin:
     cols = line.strip().split(",")
     # consider the user only if he exists in train.csv
     if programEntities.userIndex.has_key(cols[0]):
       i = programEntities.userIndex[cols[0]]
       self.userMatrix[i, 0] = cleaner.getLocaleId(cols[1])
       self.userMatrix[i, 1] = cleaner.getBirthYearInt(cols[2])
       self.userMatrix[i, 2] = cleaner.getGenderId(cols[3])
       self.userMatrix[i, 3] = cleaner.getJoinedYearMonth(cols[4])
       self.userMatrix[i, 4] = cleaner.getCountryId(cols[5])
       self.userMatrix[i, 5] = cleaner.getTimezoneInt(cols[6])
   fin.close()
   # normalize the user matrix
   self.userMatrix = normalize(self.userMatrix, norm="l1", axis=0, copy=False)
   sio.mmwrite("../Models/US_userMatrix", self.userMatrix)
   # calculate the user similarity matrix and save it for later
   self.userSimMatrix = ss.dok_matrix((nusers, nusers))
   for i in range(0, nusers):
     self.userSimMatrix[i, i] = 1.0
   for u1, u2 in programEntities.uniqueUserPairs:
     i = programEntities.userIndex[u1]
     j = programEntities.userIndex[u2]
     if not self.userSimMatrix.has_key((i, j)):
       usim = sim(self.userMatrix.getrow(i).todense(),
         self.userMatrix.getrow(j).todense())
       self.userSimMatrix[i, j] = usim
       self.userSimMatrix[j, i] = usim
   sio.mmwrite("../Models/US_userSimMatrix", self.userSimMatrix)
开发者ID:ChrisBg,项目名称:mlia-examples,代码行数:34,代码来源:BaseData.py


示例6: make_author_vectors

def make_author_vectors(crawl_fname, doc_vec_fname, auth_vec_fname):
    docs = np.load(doc_vec_fname)
    doc_vecs = docs["vectors"][()]
    # Convert to LIL, because modifying CSR is slow
    doc_vecs = doc_vecs.tolil()
    
    # Create mapping from label (=DOI) to row number (=doc vector)  
    doi2n = dict((l,i) for i,l in enumerate(docs["labels"]))
    
    # Collect authors         
    tree = etree.parse(crawl_fname)
    authors = np.array(list(set(tree.xpath("//author/text()"))))

    # Create empty author vectors
    shape = (len(authors), doc_vecs.shape[1])
    auth_vecs = sp.lil_matrix(shape)     
    
    # Create mapping from authors to row number (=author vector)
    auth2n = dict((a,i) for i,a in enumerate(authors))
    
    ## author to group mapping
    ##auth2group = {}
    
    # Fill author vectors by adding doc vectors 
    for item in tree.findall("//item"):
        author = item.find("author").text
        ##group = item.find("group")
        ##auth2group[author] = group
        url = item.find("url").text
        query = urlparse.urlparse(url).query
        doi = urlparse.parse_qs(query)["doi"][0]
        log.debug(u"DOI={} author={}".format(doi, author))
        
        try:
            auth_vecs[auth2n[author]] += doc_vecs[doi2n[doi]]
        except KeyError:
            log.warning(u"No document with DOI={} for author {}".format(
                doi, author))
            
    auth_vecs = auth_vecs.tocsr()
    
    ##group_labels = [auth2group[auth] for auth in authors]
           
    log.info("saving matrix in Numpy format to " + auth_vec_fname)
    np.savez(auth_vec_fname, 
             vectorizer=docs["vectorizer"],
             vectors=auth_vecs,
             author_labels=authors,
             ##group_labels=group_labels
             ) 
    
    base_fname = splitext(auth_vec_fname)[0]
    
    mm_fname = base_fname + ".mtx"
    log.info("saving matrix in Matrix Market format to " + mm_fname)
    mmwrite(mm_fname, auth_vecs, "IDIScape document vectors", "integer")
    
    label_fname = base_fname + "_labels.txt"
    log.info("saving labels to " + label_fname)
    open(label_fname, "w", "utf8").write(u"\n".join(authors))    
开发者ID:emsrc,项目名称:idiscape,代码行数:60,代码来源:make_author_vecs.py


示例7: RW_POP

def RW_POP(alpha, nbBasket, nbReco):
	data = load()
	###############################################################
	# CREATE MODELS
	###############################################################
	print 'Create the model based on the training set'
	
	modelRW = processing.BasketRandomWalk_POP(data.getUserItemMatrix(), alpha)
		
	###############################################################
	# SET RECOMMENDATION
	###############################################################
	if nbBasket == -1:
		evalRW = processing.Evaluation(modelRW, data.getBasketItemList(), nbReco)
	else :
		evalRW = processing.Evaluation(modelRW, data.getBasketItemList()[:nbBasket], nbReco)
	
	###############################################################
	# LAUNCH RECOMMENDATION + SAVE RESULTS
	###############################################################	
	t = time.time()
	evalRW.newEval()
	RWTime = time.time()-t
	mmwrite(resultFolder+'RW_POP_a%s_nb%s'%(alpha, nbBasket),evalRW.perf) 
	
	print 'RW_POP Execution time:', RWTime
	print 'Performances :'
	print evalRW.testNames
	print evalRW.computePerf()
	evalRW.savePerf(resultFolder+'RW_POP_a%s_nb%s.txt'%(alpha, nbBasket))
	return evalRW
开发者ID:kfrancoi,项目名称:phd-retailreco,代码行数:31,代码来源:Experiences.py


示例8: make_doc_vectors

def make_doc_vectors(fname_pat, out_fname):
    fnames = glob(fname_pat)
    labels = [splitext(basename(fn))[0] for fn in fnames]
    stop_words = frozenset(list(ENGLISH_STOP_WORDS) + OTHER_STOPWORDS)
    vectorizer = CountVectorizer(input="filename", 
                                 ngram_range=(1,3),
                                 min_df=5, 
                                 max_df=0.7,
                                 stop_words=stop_words,
                                 token_pattern=r"(?u)\b[A-Za-z]\w+\b")
    vectors = vectorizer.fit_transform(fnames)
    
    log.info("saving matrix in Numpy format to " + out_fname)
    np.savez(out_fname, 
             vectorizer=vectorizer,
             vectors=vectors,
             labels=labels)
    
    base_fname = splitext(out_fname)[0]
    
    mm_fname = base_fname + ".mtx"
    log.info("saving matrix in Matrix Market format to " + mm_fname)
    mmwrite(mm_fname, vectors, "IDIScape document vectors", "integer")

    feat_fname = base_fname + "_features.txt"
    log.info("saving features to " + feat_fname)
    feat_names = vectorizer.get_feature_names() 
    open(feat_fname, "w", "utf8").write(u"\n".join(feat_names))
    
    label_fname = base_fname + "_labels.txt"
    log.info("saving labels to " + label_fname)
    open(label_fname, "w", "utf8").write(u"\n".join(labels))    
开发者ID:emsrc,项目名称:idiscape,代码行数:32,代码来源:make_doc_vectors.py


示例9: main

def main():
    """
        Main entry point to script to perform kmeans.

        Returns:

        - `0` or `1` on success or failure respectively.
        - Saves `centroids`, `centroiddict`, and `clusters` in working dir.

    """
    parser = gen_args()
    args = parser.parse_args()
    sessionid = args.sessionid
    data = spio.mmread(args.data).tocsc()
    logger = logging.getLogger(__name__)
    logger.addHandler(logging.StreamHandler())
    if args.verbose:
        logger.setLevel(logging.DEBUG)
    if args.k:
        k = args.k
    kmeans = KMeans(data, k, args.n, args.delta, args.randomcentroids, \
                    args.classical, args.verbose)
    result = kmeans.run()
    clusters = result['clusters']
    centroids = result['centroids']
    centroiddict = result['centroiddict']
    cPickle.dump(clusters, open("data_clusters_" + sessionid + '.pck', 'w'))
    cPickle.dump(centroiddict, open("centroid_dict_" + \
                                    sessionid + '.pck', 'w'))
    spio.mmwrite(open("data_centroids_" + sessionid + '.mtx', 'w'), \
                 centroids, comment="CSC Matrix", field='real')
    logger.info(" %d Clusters Generated ", len(clusters))
    return 0
开发者ID:eshwaran,项目名称:matsya,代码行数:33,代码来源:kmeans.py


示例10: __init__

  def __init__(self, programEvents):
        
    nevents = len(programEvents.eventIndex.keys())
    self.eventPopularity = ss.dok_matrix((nevents, 5))
    self.eventAttendees = collections.defaultdict(list)
    f = open("/users/chaitanya/PyCharmProjects/EventRec/data/event_attendees.csv", 'rb')
    f.readline() # skip header
    
    for line in f:
      cols = line.strip().split(",")
      eventId = cols[0]

      if programEvents.eventIndex.has_key(eventId):      
        i = programEvents.eventIndex[eventId]
        self.eventPopularity[i, 0] =           len(cols[1].split(" ")) - len(cols[4].split(" "))   # number of yes-no
        self.eventPopularity[i, 1] =           len(cols[3].split(" "))        # number of invited folks
        
        self.eventAttendees[i].append(cols[1].split(" "))
                                                            #list of yes folks
        
        self.eventAttendees[i].append(cols[2].split(" "))    #list of no folks
              
        self.eventAttendees[i].append(cols[3].split(" "))   #list of invited folks
                
    f.close()
    
    self.eventPopularity = normalize(self.eventPopularity, norm="l1",axis=0, copy=False)
    sio.mmwrite("/users/chaitanya/PyCharmProjects/EventRec/Models/EA_eventPopularity", self.eventPopularity)
    cPickle.dump(self.eventAttendees, open("/users/chaitanya/PyCharmProjects/EventRec/Models/PE_eventAttendees.pkl", 'wb'))
开发者ID:chaitanyamalaviya,项目名称:recsys,代码行数:29,代码来源:rabbi.py


示例11: encode

def encode() :
    """
    Generate extra features from pairs, triplets, and common
    quadruplets of the existing features and then save those features
    in a sparse matrix to disk.
    """
    dftrain = load_dataframe('train')
    dftest = load_dataframe('test')
    lentrain = len(dftrain)
    all_data = np.vstack((dftrain.ix[:,1:-1], dftest.ix[:,1:-1]))
    np.array(dftrain.ACTION).dump('{}/train_truth.dat'.format(ddir))
    
    dp = group_data(all_data, degree=2, remove_unique=True)
    dt = group_data(all_data, degree=3, remove_unique=True)
    dq = group_data(all_data, degree=4, remove_unique=True)
    dq = remove_rare(dq, 15)

    X = all_data[:lentrain]
    X_2 = dp[:lentrain]
    X_3 = dt[:lentrain]
    X_4 = dq[:lentrain]
    X_train_all = np.hstack((X, X_2, X_3, X_4))
    mmwrite('{}/train_encoded'.format(ddir), X_train_all)

    X_test = all_data[lentrain:]
    X_test_2 = dp[lentrain:]
    X_test_3 = dt[lentrain:]
    X_test_4 = dq[lentrain:]
    X_test_all = np.hstack((X_test, X_test_2, X_test_3, X_test_4))
    mmwrite('{}/test_encoded'.format(ddir), X_test_all)
开发者ID:jamesjohnson92,项目名称:kaggle-amazonaccess,代码行数:30,代码来源:utility.py


示例12: get_content_similarity_scores

def get_content_similarity_scores(readmes, dataset_dir, profile="tfidf",
                                  similarity="cos"):
    """Return CSR matrix of similarity_{r,r} for all r in `readmes`.

       `dataset_dir`      the directory where the similarity scores are
       `profile`    bool or tfidf
       `similarity` cos or ijd (inverse Jacquard Distance)
    """
    if profile == "tfidf":
        sim_fn = join(dataset_dir, TF_IDF_FN)

    if exists(sim_fn):
        return mmread(sim_fn).tocsr()

    if profile == "bool":
        #readme_words = COUNTVECTORIZER readmes
        pass
    else:
        tfidf = TfidfVectorizer(input='file', #sublinear_tf=True,
                                max_df=0.5, stop_words='english',
                                decode_error="ignore")
        #max_df=0.5: if a word occurs in more than half of the readmes it is
        #            ignored
        readme_words = tfidf.fit_transform(readmes)

    if similarity == "cos":
        similarity_scores = csr_matrix(cosine_similarity(readme_words))
    else:
        # similarity_scores = csr_matrix(ijd(readme_words))
        pass

    mmwrite(sim_fn, similarity_scores, comment=profile+"_"+similarity+"_similarity_{r,r}")
    return similarity_scores
开发者ID:fenekku,项目名称:Masters,代码行数:33,代码来源:content_similarity_scores.py


示例13: store_matrix

def store_matrix(matrix='',
                 output_dir_path='',
                 out_file_name='',
                 output_format=''):
    """store_matrix."""
    if not os.path.exists(output_dir_path):
        os.mkdir(output_dir_path)
    full_out_file_name = os.path.join(output_dir_path, out_file_name)
    if output_format == "MatrixMarket":
        if len(matrix.shape) == 1:
            raise Exception(
                "'MatrixMarket' format supports only 2D dimensional array\
                and not vectors")
        else:
            io.mmwrite(full_out_file_name, matrix, precision=None)
    elif output_format == "numpy":
        np.save(full_out_file_name, matrix)
    elif output_format == "joblib":
        joblib.dump(matrix, full_out_file_name)
    elif output_format == "text":
        with open(full_out_file_name, "w") as f:
            if len(matrix.shape) == 1:
                for x in matrix:
                    f.write("%s\n" % (x))
            else:
                raise Exception(
                    "'text' format supports only mono dimensional array\
                    and not matrices")
    logger.info("Written file: %s" % full_out_file_name)
开发者ID:fabriziocosta,项目名称:EDeN,代码行数:29,代码来源:util.py


示例14: SOP

def SOP(alpha, teta, nbBasket, nbReco):
	
	data = load()
	###############################################################
	# CREATE MODELS
	###############################################################
	print 'Create the model based on the training set'
	
	modelSOP = processing.SOPRecoModel(data.getUserItemMatrix(), alpha, teta)
	modelSOP.launch()
		
	###############################################################
	# SET RECOMMENDATION
	###############################################################
	if nbBasket == -1:
		evalSOP = processing.Evaluation(modelSOP, data.getBasketItemList(), nbReco)
	else :
		evalSOP = processing.Evaluation(modelSOP, data.getBasketItemList()[:nbBasket], nbReco)
	
	###############################################################
	# LAUNCH RECOMMENDATION + SAVE RESULTS
	###############################################################	
	t = time.time()
	evalSOP.newEval()
	SOPTime = time.time()-t
	mmwrite('SOPPerf_a%s_t%s_nb%s_nr%s'%(alpha,teta,nbBasket,nbReco),evalSOP.perf) 
	
	print 'SOP Execution time:', SOPTime
	print 'Performances : '
	print evalSOP.testNames
	print evalSOP.meanPerf()
	evalSOP.savePerf('SOPPerf_a%s_t%s_nb%s_nr%s.txt'%(alpha,teta,nbBasket,nbReco))
	return evalSOP
开发者ID:kfrancoi,项目名称:phd-retailreco,代码行数:33,代码来源:Experiences_modif.py


示例15: save_new_ref

def save_new_ref(filename, data):
    """ Saves a new version of the reference data, and backs up the old """
    
    ext = filename.split('.')[-1]
    
    if (data == None):
        print("WARNING: Error generating file: %s" % filename)
        print("Skipped... try again.")
        return
    
    if os.path.exists(filename):
        os.system( 'mv %s %s' % (filename, BACKUP_DIR) )
    
    if ext in ['h5', 'lh5']:
        if scipy.sparse.issparse(data):
            data = data.toarray()
        Serializer.SaveData(filename, data)
    elif ext == 'mtx':
        io.mmwrite(filename, data)
    elif ext == 'pkl':
        f = open(filename, 'w')
        pickle.dump(f, data)
        f.close()
    else:
        raise ValueError('Could not understand extension (.%s) for %s' % (ext, filename))
    
    return
开发者ID:AgnesHH,项目名称:msmbuilder,代码行数:27,代码来源:generate_tpt_ref.py


示例16: nearest_neighbor_degree_analysis

 def nearest_neighbor_degree_analysis(self, target, method="online"):
     """nearest neighbors' degree
     ref Empirical analysis of web-based user-object bipartite networks, Ming-Sheng Shang et al. 17 June 2010.
     target = 'user' means nearest neighbors' degree for users,
     target = 'item' means nearest neighbors' degree for items.        
     method = 'online' means online calculation,
     method = 'offline' means using offline results.        
     """
     filepath = "./offline_results/nn_degree"
     if method == "online":# online calculation
         tinynum = 0.00000001
         if target == "user":# for user
             self.ui_matrix = self.ui_matrix.tocsc()
             degree = sparse.csc_matrix(self.ui_matrix.sum(0))
             # degree = degree + sparse.csc_matrix(np.ones([1, self.usernum]))*tinynum# to avoid zero division
             nn_degree = sparse.csc_matrix(self.ui_matrix.sum(1).transpose())\
                 .dot(self.ui_matrix)/degree
         elif target == "item":# for item
             self.ui_matrix = self.ui_matrix.tocsr()
             degree = sparse.csr_matrix(self.ui_matrix.sum(1))
             # degree = degree + sparse.csr_matrix(np.ones([self.itemnum, 1]))*tinynum# to avoid zero division
             nn_degree = self.ui_matrix.dot(sparse.csr_matrix(self.ui_matrix.sum(0).transpose()))/degree
         else:
             print "target arg error !"
             sys.exit()
         if target == "user" or target == "item":
             try:
                 io.mmwrite(filepath+"_%s"%target, nn_degree)
             except Exception,e:
                 print e
                 sys.exit()
             return nn_degree
开发者ID:hugochan,项目名称:DataAnalysis,代码行数:32,代码来源:datanalysis.py


示例17: buildBaseDB

    def buildBaseDB(self, verticesPath):

        mainDir = os.path.dirname(verticesPath)
        verticesFileName = os.path.basename(verticesPath)
        targetsDBPath = os.path.join(mainDir,self.targetDBFolder)

        if "_vertices" not in verticesFileName:
            print "Error: the vertices file name must have the form of *_vertices.dat"
            return

        if not os.path.isdir(targetsDBPath):
            print "Error: targets folder %s not found"%(targetsDBPath)
            return

        base_file = os.path.join(mainDir, verticesFileName.replace("_vertices","_base"))
	vertices,lookup  = buildbase.read_vertices(verticesPath)

	print "Loading targets..."
	rb,target_names = buildbase.load_targets(targetsDBPath,vertices,lookup)
	print "Making base..."
	base,back = buildbase.make_base(rb,self.cutOff)
	del rb
	print "Saving base..."
	f = open(base_file,'w')
	mmwrite(f,base)
	f.close()
	del base
        print "Saved in %s"%(base_file)
开发者ID:Iffar,项目名称:makehuman_datagen,代码行数:28,代码来源:maketargetlib.py


示例18: process_dataset

def process_dataset(name):
    prefix = 'dataset/' + name + '/' + name
    fea = np.loadtxt(prefix + '.fea')
    # transform link
    link_data = np.loadtxt(prefix + '.link')
    link_data = link_data - 1
    reverse_link_data = np.append(link_data[:, 1][:,np.newaxis], link_data[:, 0][:,np.newaxis], axis=1)
    link_data = np.append(link_data, reverse_link_data, axis=0)
    weight = [1]*link_data.shape[0]
    num_inst = fea.shape[0]
    link = sparse.csr_matrix((weight, link_data.transpose()), shape=(num_inst, num_inst))
    # transform label
    gnd = np.loadtxt(prefix + '.gnd')
    lb = preprocessing.LabelBinarizer()
    label = lb.fit_transform(gnd)
    label = label.astype(np.float)
    # use max component
    g = nx.Graph(link)
    mc = nx.connected_components(g)[0]
    link = link[mc, :][:, mc]
    label = label[mc, :]
    fea = fea[mc, :]
    # save
    np.save(prefix + '_fea', fea)
    mmwrite(prefix + '_link', link)
    np.save(prefix + '_label', label)
    np.save(prefix + '_mc', mc)
    return fea, link, label
开发者ID:shirc,项目名称:collective-classification,代码行数:28,代码来源:dataset.py


示例19: CondProb

def CondProb(alpha, nbBasket, nbReco):
	data = load()
	###############################################################
	# CREATE MODELS
	###############################################################
	print 'Create the model based on the training set'
	
	modelCondProb = processing.CondProbRecoModel(data.getUserItemMatrix(), alpha)
		
	###############################################################
	# SET RECOMMENDATION
	###############################################################
	if nbBasket == -1:
		evalCondProb = processing.Evaluation(modelCondProb, data.getBasketItemList(), nbReco)
	else :
		evalCondProb = processing.Evaluation(modelCondProb, data.getBasketItemList()[:nbBasket], nbReco)
	
	###############################################################
	# LAUNCH RECOMMENDATION + SAVE RESULTS
	###############################################################	
	t = time.time()
	evalCondProb.newEval()
	CondProbTime = time.time()-t
	mmwrite(resultFolder+'CondProb_a%s_nb%s'%(alpha, nbBasket),evalCondProb.perf) 
	
	print 'Cond Prob Execution time:', CondProbTime
	print 'Performances :'
	print evalCondProb.testNames
	print evalCondProb.computePerf()
	evalCondProb.savePerf(resultFolder+'CondProb_a%s_nb%s.txt'%(alpha, nbBasket))
	return evalCondProb
开发者ID:kfrancoi,项目名称:phd-retailreco,代码行数:31,代码来源:Experiences.py


示例20: df_to_sparse

def df_to_sparse (jour,df_in, Fam, col): 
    
    # INPUTS:
    # num = jour considéré
    # df_in = nom du dataframe d'entrée à traiter
    # Fam = famille considéré 
    # col = colonne à conserver du data frame d'entrée vers celui de sortie (si
    # col = 0 on prend tout le dataframe)
    
    # OUTPUT:
    # Il n'y a pas d'output, le fichier est enregistré sous format de sparse
    # matrix
    
    # FONCTION:
    dossier = str(jour) +'.03.2013'
    path_entree = 'C:\Users\lbn\Documents\\data_frames\\'
    path_sortie = 'C:\Users\lbn\Documents\\data_frames\\sparse_data\\'
    doc_entree = path_entree + dossier +'\\'+df_in + str(jour) +'.pickle'
    doc_sortie = path_sortie + dossier +'\\'+df_in+ str(jour)+Fam
    
    data = read_pickle(doc_entree)
    if type(col) == list :
        data = csr_matrix(data.iloc[:,col], dtype=np.int8) 
    else :
        data = csr_matrix(data, dtype=np.int8) 
    io.mmwrite(doc_sortie, data)

    print('Traduction effectuee de:', df_in+ str(jour)+Fam)
开发者ID:ben6684,项目名称:LBN,代码行数:28,代码来源:traduc_df_to_sparse.py



注:本文中的scipy.io.mmwrite函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python io.netcdf_file函数代码示例发布时间:2022-05-27
下一篇:
Python io.mmread函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap