• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python re2.findall函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中re2.findall函数的典型用法代码示例。如果您正苦于以下问题:Python findall函数的具体用法?Python findall怎么用?Python findall使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了findall函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: __findTagAttributes

 def __findTagAttributes(tag):
     att_double = re.findall('<\w*[ ]| *(.*?)[ ]*=[ ]*"(.*?)"[ +|>]', tag)
     att_single = re.findall('<\w*[ ]| *(.*?)[ ]*=[ ]*\'(.*?)\'[ +|>]', tag)
     att_none = re.findall('<\w*[ ]| *(.*?)[ ]*=[ ]*["|\']?(.*?)["|\']?[ +|>]', tag)
     att_none.extend(att_single)
     att_none.extend(att_double)
     return att_none
开发者ID:psuedoelastic,项目名称:wapiti,代码行数:7,代码来源:lswww.py


示例2: _unpack

    def _unpack(self, buf):
        """Extract into a list irc messages of a tcp streams.
        @buf: tcp stream data
        """
        try:
            f = cStringIO.StringIO(buf)
            lines = f.readlines()
        except Exception:
            log.error("Failed reading tcp stream buffer")
            return False

        logirc = False
        for element in lines:
            if not re.match("^:", element) is None:
                command = "([a-zA-Z]+|[0-9]{3})"
                params = "(\x20.+)"
                irc_server_msg = re.findall("(^:[\w+.{}[email protected]|()]+\x20)" + command + params, element)
                if irc_server_msg:
                    self._sc["prefix"] = convert_to_printable(irc_server_msg[0][0].strip())
                    self._sc["command"] = convert_to_printable(irc_server_msg[0][1].strip())
                    self._sc["params"] = convert_to_printable(irc_server_msg[0][2].strip())
                    self._sc["type"] = "server"
                    if logirc:
                        self._messages.append(dict(self._sc))
            else:
                irc_client_msg = re.findall("([a-zA-Z]+\x20)(.+[\x0a\0x0d])", element)
                if irc_client_msg and irc_client_msg[0][0].strip() in self.__methods_client:
                    self._cc["command"] = convert_to_printable(irc_client_msg[0][0].strip())
                    if self._cc["command"] in ["NICK", "USER"]:
                        logirc = True
                    self._cc["params"] = convert_to_printable(irc_client_msg[0][1].strip())
                    self._cc["type"] = "client"
                    if logirc:
                        self._messages.append(dict(self._cc))
开发者ID:CIRCL,项目名称:cuckoo-modified,代码行数:34,代码来源:irc.py


示例3: run

    def run(self):
        """Run extract of printable strings.
        @return: list of printable strings.
        """
        self.key = "strings"
        strings = []

        if self.task["category"] == "file":
            if not os.path.exists(self.file_path):
                raise CuckooProcessingError("Sample file doesn't exist: \"%s\"" % self.file_path)

            try:
                data = open(self.file_path, "rb").read()
            except (IOError, OSError) as e:
                raise CuckooProcessingError("Error opening file %s" % e)

            nulltermonly = self.options.get("nullterminated_only", True)
            minchars = self.options.get("minchars", 5)

            if nulltermonly:
                apat = "([\x20-\x7e]{" + str(minchars) + ",})\x00"
                upat = "((?:[\x20-\x7e][\x00]){" + str(minchars) + ",})\x00\x00"
            else:
                apat = "[\x20-\x7e]{" + str(minchars) + ",}"
                upat = "(?:[\x20-\x7e][\x00]){" + str(minchars) + ",}"

            strings = re.findall(apat, data)
            for ws in re.findall(upat, data):
                strings.append(str(ws.decode("utf-16le")))

        return strings
开发者ID:Abdullah-Mughal,项目名称:cuckoo-modified-1,代码行数:31,代码来源:strings.py


示例4: do_strings

    def do_strings(self):
        strings_path = None
        if self.voptions.basic.dostrings:
            try:
                data = open(self.memfile, "rb").read()
            except (IOError, OSError) as e:
                raise CuckooProcessingError("Error opening file %s" % e)

            nulltermonly = self.voptions.basic.get("strings_nullterminated_only", True)
            minchars = self.voptions.basic.get("strings_minchars", 5)

            if nulltermonly:
                apat = "([\x20-\x7e]{" + str(minchars) + ",})\x00"
                upat = "((?:[\x20-\x7e][\x00]){" + str(minchars) + ",})\x00\x00"
            else:
                apat = "[\x20-\x7e]{" + str(minchars) + ",}"
                upat = "(?:[\x20-\x7e][\x00]){" + str(minchars) + ",}"

            strings = re.findall(apat, data)
            for ws in re.findall(upat, data):
                strings.append(str(ws.decode("utf-16le")))
            data = None
            f=open(self.memfile + ".strings", "w")
            f.write("\n".join(strings))
            f.close()
开发者ID:453483289,项目名称:cuckoo-modified,代码行数:25,代码来源:memory.py


示例5: test_re_findall

 def test_re_findall(self):
     self.assertEqual(re.findall(":+", "abc"), [])
     self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
     self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
     self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
                                                            (":", ":"),
                                                            (":", "::")])
开发者ID:PeterScott,项目名称:pyre2,代码行数:7,代码来源:test_re.py


示例6: handle_data

 def handle_data(self, data):
     if self.inscript:
         allowed_ext = [".php", ".asp", ".xml", ".js", ".json", ".jsp"]
         self.liens.extend(lamejs.lamejs(data).getLinks())
         candidates = re.findall(r'"([A-Za-z0-9_=#&%\.\+\?/-]*)"', data)
         candidates += re.findall(r"'([A-Za-z0-9_=#&%\.\+\?/-]*)'", data)
         for jstr in candidates:
             if jstr not in self.common_js_strings:
                 for ext in allowed_ext:
                     if ext in jstr:
                         self.liens.append(jstr)
开发者ID:psuedoelastic,项目名称:wapiti,代码行数:11,代码来源:lswww.py


示例7: extract_urls

def extract_urls(msg, html=False):
    if html:
        msg = msg.replace("=3D", '=')
        for x in REPLACE:
            msg = msg.replace(x, '')

        urls = re.findall(RE_URL_HTML, msg)
    else:
        urls = re.findall(RE_URL_PLAIN, msg)
    pprint(urls)
    links = set()
    for u in urls:
        u = str(u.decode()).rstrip("/")
        links.add(u)
    return links
开发者ID:aeppert,项目名称:py-cifsdk,代码行数:15,代码来源:urls.py


示例8: get_schedule_line_groups

def get_schedule_line_groups(classified_event):
    text = classified_event.processed_text.get_tokenized_text()

    # (?!20[01][05])
    time = r'\b[012]?\d[:.,h]?(?:[0-5][05])?(?:am|pm)?\b'
    time_with_minutes = r'\b[012]?\d[:.,h]?(?:[0-5][05])(?:am|pm)?\b'
    time_to_time = r'%s ?(?:to|do|до|til|till|alle|a|-|–|[^\w,.]) ?%s' % (time, time)

    # We try to grab all lines in schedule up until schedule ends,
    # so we need a "non-schedule line at the end", aka ['']
    lines = text.split('\n') + ['']
    idx = 0
    schedule_lines = []
    while idx < len(lines):
        first_idx = idx
        while idx < len(lines):
            line = lines[idx]
            # if it has
            # grab time one and time two, store diff
            # store delimiters
            # maybe store description as well?
            # compare delimiters, times, time diffs, styles, etc
            times = re.findall(time_to_time, line)
            if not times or len(line) > 80:
                if idx - first_idx >= 1:
                    schedule_lines.append(lines[first_idx:idx])
                break
            idx += 1
        first_idx = idx
        while idx < len(lines):
            line = lines[idx]
            times = re.findall(time, line)
            # TODO(lambert): Somehow track "1)" that might show up here? :(
            times = [x for x in times if x not in ['1.', '2.']]
            if not times or len(line) > 80:
                if idx - first_idx >= 3:
                    schedule_lines.append(lines[first_idx:idx])
                break
            idx += 1
        idx += 1

    schedule_groups = []
    for sub_lines in schedule_lines:
        if not [x for x in sub_lines if re.search(time_with_minutes, x)]:
            continue
        schedule_groups.append(sub_lines)

    return schedule_groups
开发者ID:mikelambert,项目名称:dancedeets-monorepo,代码行数:48,代码来源:event_structure.py


示例9: on_call

 def on_call(self, call, process):
     if process["process_name"].lower() not in self.whitelistprocs:
         buff = call["arguments"]["buffer"].lower()
         if len(buff) >= 128 and (call["arguments"]["filepath"].endswith(".txt") or call["arguments"]["filepath"].endswith(".htm") or call["arguments"]["filepath"].endswith(".html")):
             patterns = "|".join(indicators)
             if len(re.findall(patterns, buff)) > 1:
                 self.mark_call()
开发者ID:RicoVZ,项目名称:community,代码行数:7,代码来源:ransomware_message.py


示例10: handleEvent

    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        # We only want web content from the target
        if srcModuleName != "sfp_spider":
            return None

        eventSource = event.sourceEvent.data
        self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)

        if eventSource not in self.results.keys():
            self.results[eventSource] = list()

        # We only want web content for pages on the target site
        if not self.getTarget().matches(self.sf.urlFQDN(eventSource)):
            self.sf.debug("Not collecting web content information for external sites.")
            return None

        for regexpGrp in regexps.keys():
            if regexpGrp in self.results[eventSource]:
                continue

            for regex in regexps[regexpGrp]:
                pat = re.compile(regex, re.IGNORECASE)
                matches = re.findall(pat, eventData)
                if len(matches) > 0 and regexpGrp not in self.results[eventSource]:
                    self.sf.info("Matched " + regexpGrp + " in content from " + eventSource)
                    self.results[eventSource].append(regexpGrp)
                    evt = SpiderFootEvent("ERROR_MESSAGE", regexpGrp,
                                          self.__name__, event.sourceEvent)
                    self.notifyListeners(evt)

        return None
开发者ID:ITh4cker,项目名称:spiderfoot,代码行数:35,代码来源:sfp_errors.py


示例11: on_complete

    def on_complete(self):
        matches = [
            r'(https?:\/\/)?([\da-z\.-]+)\.([0-9a-z\.]{2,6})(:\d{1,5})?([\/\w\.-]*)\/?',
        ]
        dedup = list()
        extracted_config = False
        for potential_ioc in self.iocs:
            for entry in matches:
                all_matches = re.findall(entry, potential_ioc)
                if all_matches:
                    extracted_config = True
                    for buf in all_matches:
                        ioc = ""
                        idx = 0
                        for tmp in buf:
                            idx += 1
                            if tmp == '':
                                pass
                            # Account for match groups and the second
                            # (or third depending on match) period as a
                            # delimiter. We need to add it in manually.
                            if idx == 2:
                                ioc += tmp + "."
                            else:
                                ioc += tmp
                        if ioc not in dedup:
                            dedup.append(ioc)
        if dedup:
            for ioc in dedup:
                self.data.append({"ioc": ioc})

        return extracted_config
开发者ID:zpriddy,项目名称:cuckoo-modified,代码行数:32,代码来源:encrypted_ioc.py


示例12: find_competitor_list

def find_competitor_list(search_text):
    processed_text = grammar_matcher.StringProcessor(search_text)
    results_match = re.search(r'\n0*1[^\d].+\n^0*2[^\d].+\n(?:^\d+.+\n){2,}', processed_text.text, re.MULTILINE)
    if results_match:
        numbered_list = results_match.group(0)
        num_lines = numbered_list.count('\n')
        if len(re.findall(r'\d ?[.:h] ?\d\d|\bam\b|\bpm\b', numbered_list)) > num_lines / 4:
            return None  # good list of times! workshops, etc! performance/shows/club-set times!
        processed_numbered_list = grammar_matcher.StringProcessor(numbered_list, processed_text.match_on_word_boundaries)
        event_keywords = processed_numbered_list.get_tokens(rules.EVENT)
        if len(event_keywords) > num_lines / 8:
            return None
        if processed_text.has_token(keywords.WRONG_NUMBERED_LIST):
            return None
        if num_lines > 10:
            return numbered_list
        else:
            lines = numbered_list.split('\n')
            qualified_lines = len([x for x in lines if re.search(r'[^\d\W].*[-(]', x)])
            if qualified_lines > num_lines / 2:
                return numbered_list
            for type in ['crew', 'pop|boog', 'lock', 'b\W?(?:boy|girl)']:
                qualified_lines = len([x for x in lines if re.search(type, x)])
                if qualified_lines > num_lines / 8:
                    return numbered_list
            if processed_text.match_on_word_boundaries == regex_keywords.WORD_BOUNDARIES:  # maybe separate on kana vs kanji?
                avg_words = 1.0 * sum([len([y for y in x.split(' ')]) for x in lines]) / num_lines
                if avg_words < 3:
                    return numbered_list
    return None
开发者ID:mikelambert,项目名称:dancedeets-monorepo,代码行数:30,代码来源:event_structure.py


示例13: parsenamedacts

def parsenamedacts(pattern, intext):
    namedacts = re.findall(pattern, intext)
    namedacts = list(set(namedacts))
    outtext = intext
    for namedact in namedacts:
       #outtext =  outtext.replace(namedact+r'@/', encode_act(namedact)+r'@/')
       outtext =  outtext.replace(r'ref-namedact-'+namedact,r'ref-namedact-'+encode_act(namedact))
    return outtext
开发者ID:aih,项目名称:uscites,代码行数:8,代码来源:autoparser.py


示例14: on_call

    def on_call(self, call, process):
        if self.checkEvent and self.lastapi == "CryptHashData":
            if call["api"] == "NtOpenEvent":
                event = self.get_argument(call, "EventName")
                event = event.split("\\")
                if len(event) == 2:
                    if event[1] in self.hashes and event[0] in ["Global", "Local"]:
                        self.found = True

        if call["api"] == "GetVolumeNameForVolumeMountPointW":
            if call["status"]:
                name = self.get_argument(call, "VolumeName")
                if name and len(name) > 10:
                    name = name[10:-1]
                    if name not in self.volumes:
                        self.volumes.add(name)
                        md5 = hashlib.md5(name).hexdigest()[:16].upper()
                        self.hashes.add(md5)

        elif call["api"] == "CryptHashData":
            if self.hashes:
                buf = self.get_argument(call, "Buffer")
                if buf and all(word in buf for word in self.keywords):
                    # Try/Except handles when this behavior changes in the future
                    try:
                        args = parse_qs(urlparse("/?" + buf).query,
                                        keep_blank_values=True)
                    except:
                        self.sigchanged = True
                        self.severity = 1
                        self.description = "Potential Locky ransomware behavioral characteristics observed. (See Note)"
                        self.data.append({"Note": "Unexpected behavior observed for Locky. Please " \
                                                  "report this sample to https://github.com/spende" \
                                                  "rsandbox/community-modified/issues"})

                    if args and "id" in args.keys():
                        if args["id"][0] in self.hashes:
                            self.found = process["process_id"]
                        if "affid" in args:
                            tmp = {"Affid": args["affid"][0]}
                            if tmp not in self.data:
                                self.data.append(tmp)

                elif buf in self.volumes and self.lastapi == "GetVolumeNameForVolumeMountPointW":
                    checkEvent = True

                else:
                    check = re.findall(r"\s((?:https?://)?\w+(?:\.onion|\.tor2web)[/.](?:\w+\/)?)",
                                       buf, re.I)
                    if check:
                        for payment in check:
                            self.payment.add(payment)

        elif call["api"] == "InternetCrackUrlA":
            if self.found and process["process_id"] == self.found:
                url = self.get_argument(call, "Url")
                if url and url.endswith(".php"):
                    self.c2s.add(url)
开发者ID:Magicked,项目名称:community-modified,代码行数:58,代码来源:locky_apis.py


示例15: on_complete

    def on_complete(self):
        for screenshot in self.get_results("screenshots", []):
            if "ocr" in screenshot:
                ocr = screenshot["ocr"].lower()
                patterns = "|".join(indicators)
                if len(re.findall(patterns, ocr)) > 1:
                    self.mark_ioc("message", ocr)

        return self.has_marks()
开发者ID:RicoVZ,项目名称:community,代码行数:9,代码来源:ransomware_message.py


示例16: on_call

 def on_call(self, call, process):
     if call["api"] == "NtWriteFile":
         filescore = 0
         buff = self.get_raw_argument(call, "Buffer").lower()
         filepath = self.get_raw_argument(call, "HandleName")
         patterns = "|".join(self.indicators)
         if (filepath.lower() == "\\??\\physicaldrive0" or filepath.lower().startswith("\\device\\harddisk")) and len(buff) >= 128:
             if len(re.findall(patterns, buff)) > 1:   
                 if filepath not in self.ransomfile:
                     self.ransomfile.append(filepath)
开发者ID:jgajek,项目名称:community-modified,代码行数:10,代码来源:ransomware_message.py


示例17: do_strings

    def do_strings(self):
        strings_path = None
        if self.voptions.basic.dostrings:
            try:
                data = open(self.memfile, "r").read()
            except (IOError, OSError) as e:
                raise CuckooProcessingError("Error opening file %s" % e)

            nulltermonly = self.voptions.basic.get("strings_nullterminated_only", True)
            minchars = self.voptions.basic.get("strings_minchars", 5)

            if nulltermonly:
                apat = "([\x20-\x7e]{" + str(minchars) + ",})\x00"
                strings = re.findall(apat, data)
                upat = "((?:[\x20-\x7e][\x00]){" + str(minchars) + ",})\x00\x00"
                strings += [str(ws.decode("utf-16le")) for ws in re.findall(upat, data)]
                data = None
                f=open(dmp_path + ".strings", "w")
                f.write("\n".join(strings))
                f.close()
                strings_path = self.memfile + ".strings"
            else:
                apat = "([\x20-\x7e]{" + str(minchars) + ",})\x00"
                strings = re.findall(apat, data)
                upat = "(?:[\x20-\x7e][\x00]){" + str(minchars) + ",}"
                strings += [str(ws.decode("utf-16le")) for ws in re.findall(upat, data)]
                data = None
                f=open(self.memfile + ".strings", "w")
                f.write("\n".join(strings))
                f.close()
                strings_path = self.memfile + ".strings"

            if self.voptions.basic.zipstrings:
                try:
                    f = zipfile.ZipFile("%s.zip" % (strings_path), "w",allowZip64=True)
                    f.write(strings_path, os.path.basename(strings_path), zipfile.ZIP_DEFLATED)
                    f.close()
                    os.remove(strings_path)
                    strings_path = "%s.zip" % (strings_path)
                except Exception as e:
                    raise CuckooProcessingError("Error creating Process Memory Strings Zip File %s" % e)
开发者ID:kevross33,项目名称:cuckoo-modified,代码行数:41,代码来源:memory.py


示例18: is_workshop

def is_workshop(classified_event):
    trimmed_title = classified_event.processed_title.delete_with_rule(rules.WRONG_CLASS)
    if classified_event.processed_text.get_tokens(dance_keywords.ROMANCE):
        has_class_title = trimmed_title.get_tokens(rules.ROMANCE_EXTENDED_CLASS_ONLY)
    else:
        has_class_title = trimmed_title.get_tokens(dance_keywords.CLASS_ONLY)
    has_good_dance_class_title = trimmed_title.has_token(rules.GOOD_DANCE_CLASS)

    has_non_dance_event_title = classified_event.processed_title.has_token(keywords.BAD_COMPETITION_TITLE_ONLY)
    has_good_dance_title = trimmed_title.has_token(rules.GOOD_DANCE)
    has_extended_good_crew_title = trimmed_title.has_token(rules.MANUAL_DANCER[grammar.STRONG_WEAK])

    has_wrong_style_title = classified_event.processed_title.has_token(all_styles.DANCE_WRONG_STYLE_TITLE)

    final_title = classified_event.processed_title.get_tokenized_text()
    lee_lee_hiphop = 'lee lee' in final_title and re.findall('hip\W?hop', final_title)

    trimmed_text = classified_event.processed_text.delete_with_rule(rules.WRONG_CLASS)
    has_good_dance_class = trimmed_text.has_token(rules.GOOD_DANCE_CLASS)
    has_good_dance = classified_event.processed_text.has_token(rules.GOOD_DANCE)
    has_wrong_style = classified_event.processed_text.has_token(all_styles.DANCE_WRONG_STYLE_TITLE)

    has_good_crew = classified_event.processed_text.has_token(rules.MANUAL_DANCER[grammar.STRONG])

    # print has_class_title
    # print has_good_dance_title
    # print has_extended_good_crew_title
    # print has_wrong_style_title

    # print classified_event.processed_text.get_tokenized_text()
    # print ''
    # print has_class_title
    # print has_wrong_style
    # print has_good_dance
    # print has_good_crew
    if has_class_title and (has_good_dance_title or has_extended_good_crew_title) and not has_wrong_style_title:
        return (
            True, 'has class with strong class-title: %s %s' % (has_class_title, (has_good_dance_title or has_extended_good_crew_title))
        )
    elif classified_event.is_dance_event(
    ) and has_good_dance_title and has_extended_good_crew_title and not has_wrong_style_title and not has_non_dance_event_title:
        return (True, 'has class with strong style-title: %s %s' % (has_good_dance_title, has_extended_good_crew_title))
    elif classified_event.is_dance_event() and lee_lee_hiphop and not has_wrong_style_title and not has_non_dance_event_title:
        return (True, 'has class with strong style-title: %s %s' % (has_good_dance_title, has_extended_good_crew_title))
    elif has_class_title and not has_wrong_style and (has_good_dance or has_good_crew):
        return (True, 'has class title: %s, that contains strong description %s, %s' % (has_class_title, has_good_dance, has_good_crew))
    elif has_good_dance_class_title:
        return (True, 'has good dance class title: %s' % has_good_dance_class_title)
    elif has_good_dance_class and not has_wrong_style_title:
        return (True, 'has good dance class: %s' % has_good_dance_class)
    return (False, 'nothing')
开发者ID:mikelambert,项目名称:dancedeets-monorepo,代码行数:51,代码来源:classifier.py


示例19: handleEvent

    def handleEvent(self, event):
        eventName = event.eventType
        srcModuleName = event.module
        eventData = event.data

        self.sf.debug("Received event, " + eventName + ", from " + srcModuleName)

        pat = re.compile("([\%a-zA-Z\.0-9_\-\+][email protected][a-zA-Z\.0-9\-]+\.[a-zA-Z\.0-9\-]+)")
        matches = re.findall(pat, eventData)
        myres = list()
        for match in matches:
            evttype = "EMAILADDR"
            if len(match) < 4:
                self.sf.debug("Likely invalid address: " + match)
                continue

            # Handle messed up encodings
            if "%" in match:
                self.sf.debug("Skipped address: " + match)
                continue

            # Get the domain and strip potential ending .
            mailDom = match.lower().split('@')[1].strip('.')
            if not self.getTarget().matches(mailDom) and not self.getTarget().matches(match):
                self.sf.debug("External domain, so possible affiliate e-mail")
                evttype = "AFFILIATE_EMAILADDR"

            if eventName.startswith("AFFILIATE_"):
                evttype = "AFFILIATE_EMAILADDR"

            self.sf.info("Found e-mail address: " + match)
            if type(match) == str:
                mail = unicode(match.strip('.'), 'utf-8', errors='replace')
            else:
                mail = match.strip('.')

            if mail in myres:
                self.sf.debug("Already found from this source.")
                continue
            else:
                myres.append(mail)

            evt = SpiderFootEvent(evttype, mail, self.__name__, event)
            if event.moduleDataSource:
                evt.moduleDataSource = event.moduleDataSource
            else:
                evt.moduleDataSource = "Unknown"
            self.notifyListeners(evt)

        return None
开发者ID:smicallef,项目名称:spiderfoot,代码行数:50,代码来源:sfp_email.py


示例20: get_tags

def get_tags(src, tags='page,title,revision,text'):
    # find namespace (eg: http://www.mediawiki.org/xml/export-0.3/)
    try:
        root = src.readline() + src.readline()
        ns = unicode(re.findall(r'xmlns="([^"]*)', root)[0])

        tag_prefix = u'{%s}' % (ns,)

        tag = {}
        for t in tags.split(','):
            tag[t] = tag_prefix + unicode(t)
    finally:
        src.seek(0)

    return tag
开发者ID:davkal,项目名称:wiki-network,代码行数:15,代码来源:__init__.py



注:本文中的re2.findall函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python re2.match函数代码示例发布时间:2022-05-26
下一篇:
Python re2.compile函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap